头图
Uploading large files will consume a lot of time, and the upload may fail in the middle. At this time, we need front-end and back-end cooperation to solve this problem.

Solution steps:

  1. File sharding to reduce the time consumed by each request
  2. Notify the server to merge file shards
  3. Control the number of concurrent requests to avoid browser memory overflow
  4. When a request fails due to network or other reasons, we resend the request
  5. http

Sharding and merging of files

In JavaScript, the FILE object is a subclass of the ' Blob ' object, which contains an important method slice, by which we can split the binary file like this:

 <!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    <script src="https://cdn.bootcdn.net/ajax/libs/axios/0.24.0/axios.min.js"></script>
</head>
<body>
    <input type="file" multiple="multiple" id="fileInput" />
    <button onclick="SliceUpload()">上传</button>  
    <script>
        function SliceUpload() {
            const file = document.getElementById('fileInput').files[0]
            if (!file) return

            // 文件分片
            let size = 1024 * 50; //50KB 50KB Section size
            let fileChunks = [];
            let index = 0;        //Section num
            for (let cur = 0; cur < file.size; cur += size) {
                fileChunks.push({
                    hash: index++,
                    chunk: file.slice(cur, cur + size),
                });
            }

            // 上传分片
            const uploadList = fileChunks.map((item, index) => {
                let formData = new FormData();
                formData.append("filename", file.name);
                formData.append("hash", item.hash);
                formData.append("chunk", item.chunk);
                return axios({
                    method: "post",
                    url: "/upload",
                    data: formData,
                });
            });
            await Promise.all(uploadList);

            // 所有分片上传完成,通知服务器合并分片
            await axios({
                method: "get",
                url: "/merge",
                params: {
                    filename: file.name,
                },
            });
            console.log("Upload to complete");
        }
    </script>
</body>
</html>

concurrency control

If the file is large, there will be many shards, and the browser will initiate a large number of requests in a short period of time, which may lead to memory exhaustion, so concurrency control is required.

Here we combine the Promise.race() method to control the number of concurrent requests to avoid browser memory overflow.

 // 加入并发控制
async function SliceUpload() {
    const file = document.getElementById('fileInput').files[0]
    if (!file) return

    // 文件分片
    let size = 1024 * 50; //50KB 50KB Section size
    let fileChunks = [];
    let index = 0;        //Section num
    for (let cur = 0; cur < file.size; cur += size) {
        fileChunks.push({
            hash: index++,
            chunk: file.slice(cur, cur + size),
        });
    }

    let pool = []; //Concurrent pool
    let max = 3; //Maximum concurrency
    for (let i = 0; i < fileChunks.length; i++) {
        let item = fileChunks[i];
        let formData = new FormData();
        formData.append("filename", file.name);
        formData.append("hash", item.hash);
        formData.append("chunk", item.chunk);

        // 上传分片
        let task = axios({
            method: "post",
            url: "/upload",
            data: formData,
        });
        task.then(() => {
        // 从并发池中移除已经完成的请求
        let index = pool.findIndex((t) => t === task);
            pool.splice(index);
        });

        // 把请求放入并发池中,如果已经达到最大并发量
        pool.push(task);
        if (pool.length === max) {
            //All requests are requested complete
            await Promise.race(pool);
        }
    }

    // 所有分片上传完成,通知服务器合并分片
    await axios({
        method: "get",
        url: "/merge",
        params: {
            filename: file.name,
        },
    });
    console.log("Upload to complete");
}

make code reusable

 function SliceUpload() {
    const file = document.getElementById('fileInput').files[0]
    if (!file) return

    // 文件分片
    let size = 1024 * 50; // 分片大小设置
    let fileChunks = [];
    let index = 0;        // 分片序号
    for (let cur = 0; cur < file.size; cur += size) {
        fileChunks.push({
            hash: index++,
            chunk: file.slice(cur, cur + size),
        });
    }

    const uploadFileChunks = async function(list){
        if(list.length === 0){
            // 所有分片上传完成,通知如无
            await axios({
                method: 'get',
                url: '/merge',
                params: {
                    filename: file.name
                }
            });
            console.log('Upload to complete')
            return
        }

        let pool = []       // 并发池
        let max = 3         // 最大并发数
        let finish = 0      // 完成数量
        let failList = []   // 失败列表
        for(let i=0;i<list.length;i++){
            let item = list[i]
            let formData = new FormData()
            formData.append('filename', file.name)
            formData.append('hash', item.hash)
            formData.append('chunk', item.chunk)
            
            let task = axios({
                method: 'post',
                url: '/upload',
                data: formData
            })

            task.then((data)=>{
                // 从并发池中移除已经完成的请求
                let index = pool.findIndex(t=> t===task)
                pool.splice(index)
            }).catch(()=>{
                failList.push(item)
            }).finally(()=>{
                finish++
                // 如果有失败的重新上传
                if(finish===list.length){
                    uploadFileChunks(failList)
                }
            })
            pool.push(task)
            if(pool.length === max){
                await Promise.race(pool)
            }
        }
    }

    uploadFileChunks(fileChunks)
}

Server interface implementation

 const express = require('express')
const multiparty = require('multiparty')
const fs = require('fs')
const path = require('path')
const { Buffer } = require('buffer')
// file path
const STATIC_FILES = path.join(__dirname, './static/files')
// Temporary path to upload files
const STATIC_TEMPORARY = path.join(__dirname, './static/temporary')
const server = express()
// Static file hosting
server.use(express.static(path.join(__dirname, './dist')))
// Interface for uploading slices
server.post('/upload', (req, res) => {
    const form = new multiparty.Form();
    form.parse(req, function(err, fields, files) {
        let filename = fields.filename[0]
        let hash = fields.hash[0]
        let chunk = files.chunk[0]
        let dir = `${STATIC_TEMPORARY}/${filename}`
        // console.log(filename, hash, chunk)
        try {
            if (!fs.existsSync(dir)) fs.mkdirSync(dir)
            const buffer = fs.readFileSync(chunk.path)
            const ws = fs.createWriteStream(`${dir}/${hash}`)
            ws.write(buffer)
            ws.close()
            res.send(`${filename}-${hash} Section uploaded successfully`)
        } catch (error) {
            console.error(error)
            res.status(500).send(`${filename}-${hash} Section uploading failed`)
        }
    })
})

//Merged slice interface
server.get('/merge', async (req, res) => {
    const { filename } = req.query
    try {
        let len = 0
        const bufferList = fs.readdirSync(`${STATIC_TEMPORARY}/${filename}`).map((hash,index) => {
            const buffer = fs.readFileSync(`${STATIC_TEMPORARY}/${filename}/${index}`)
            len += buffer.length
            return buffer
        });
        //Merge files
        const buffer = Buffer.concat(bufferList, len);
        const ws = fs.createWriteStream(`${STATIC_FILES}/${filename}`)
        ws.write(buffer);
        ws.close();
        res.send(`Section merge completed`);
    } catch (error) {
        console.error(error);
    }
})

server.listen(3000, _ => {
    console.log('http://localhost:3000/')
})
The file upload is implemented above, but if the file is not uploaded completely due to some accidents when half of the file is uploaded, will the above program continue to upload the same file next time? The answer is: no. It also uploads slices of all files from scratch.
If we want to resume the upload from a breakpoint, we need to know the total size of the uploaded slices, and then use it as the starting point of the slice, and continue to upload the file slice.

http

According to the above idea, you need to add a breakpoint detection interface, and call it before uploading the file:

 // 检测断点
server.get('/checkpoint', (req, res) => {
  const { filename } = req.query;
  const dir = `${STATIC_TEMPORARY}/${filename}`;
  try {
    if (!fs.existsSync(dir)) {
      res.send({ success: true, point: 0, hash: 0 });
    } else {
      let len = 0;
      let hash = 0;
      fs.readdirSync(`${STATIC_TEMPORARY}/${filename}`).forEach((item, index) => {
        const buffer = fs.readFileSync(`${STATIC_TEMPORARY}/${filename}/${index}`);
        len += buffer.length;
        hash += 1;
      });
      return res.send({ success: true, point: len, hash: hash - 1 });
    }
  } catch (error) {
    console.error(error);
    res.status(500).send({ success: false, msg: error });
  }
});

In this way, the breakpoint is detected before uploading. If the file has been uploaded before, after calling the above interface, the front-end directly processes the file upload as complete. Otherwise, the upload progress is calculated according to the point, and the index of the resuming fragment is specified according to the hash.

At this point, the large file is uploaded in pieces, and the function of resuming the upload from a breakpoint is completed.

The article was first published on IICCOM-Technical Blog "Node.js Realizes Fragmented Upload"


来了老弟
508 声望31 粉丝

纸上得来终觉浅,绝知此事要躬行