node.jsamazon-web-servicesamazon-s3amazon-ec2busboy

Node JS - Stream data from Busboy to AWS S3


I am trying to upload a file to s3 via ec2. My first approach was - upload file to ec2 completely and then upload that file to s3. This approach is not good because transfer time from ec2 to s3 is waste of time.

Currently I am trying to use busboy upload stream to s3 upload stream so that uploading to ec2 and then ec2 to s3 will be done simultaneously as s3 "upload" method support stream as upload Body.

Here is my code -

router.post('/s3StreamUpload', function(req, res, next) {
   var busboy = new Busboy({headers: req.headers});
   busboy.on('file', function (fieldname, file, filename, encoding, mimetype) {
      console.log('Before Upload: ' + new Date());
      console.log('File [' + fieldname + ']: filename: ' + filename + ', encoding: ' + encoding + ', mimetype: ' + mimetype);

      var s3 = new AWS.S3({
         params: {Bucket: 'sswa', Key: filename, Body: file},
         options: {partSize: 5 * 1024 * 1024, queueSize: 10}   // 5 MB
      });
      s3.upload().on('httpUploadProgress', function (evt) {
         console.log(evt);
      }).send(function (err, data) {
         console.log('After Upload: ' + new Date());
         console.log(err, data);
      });
   });
   busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated, encoding, mimetype) {
      console.log('Field [' + fieldname + ']: value: ' + inspect(val));
   });
   busboy.on('finish', function() {
      console.log('Done parsing form!');
      res.writeHead(303, { Connection: 'close', Location: '/' });
      res.end();
   });
   req.pipe(busboy);
});

I have doubt is it really uploading to s3 simultaneously as stream ? Is there any drawbacks of this approach ?


Solution

  • To test whether multi-part streaming upload to S3 is working or not, I took time log at three points of execution -

    1. Before start upload from client (uploadStartTime)
    2. After uploaded to EC2 (busboyFinishTime)
    3. After transferred to S3 (s3UploadFinishTime)

    Then I run from EC2. After uploading various length of video files (36.1 MB, 33.3 MB, 52.5 MB) I observed that parts are transferred to S3 immediately for each 5MB (as I defined) uploaded to EC2. When uploading parts to S3 you will see a log of the following line. It will show file part upload progress with the part number.

    console.log(evt);
    

    For all three uploads busboyFinishTime and s3UploadFinishTime are same or there is hardly a 1-second difference.

    Example: When 52.5 MB uploaded

    {
      "uploadStartTime": "2016-04-28T14:19:51.365Z",
      "busboyFinishTime": "2016-04-28T14:22:26.292Z",
      "s3UploadFinishTime": "2016-04-28T14:22:26.558Z"
    }
    

    Full code:

    router.post('/s3StreamUpload', function(req, res, next) {
       var busboy = new Busboy({headers: req.headers});
       var uploadStartTime = new Date(),
          busboyFinishTime = null,
          s3UploadFinishTime = null;
    
       busboy.on('file', function (fieldname, file, filename, encoding, mimetype) {
          console.log('File [' + fieldname + ']: filename: ' + filename + ', encoding: ' + encoding + ', mimetype: ' + mimetype);
    
          var s3 = new AWS.S3({
             params: {Bucket: 'sswa', Key: filename, Body: file},
             options: {partSize: 5 * 1024 * 1024, queueSize: 10}   // 5 MB
          });
          s3.upload().on('httpUploadProgress', function (evt) {
             console.log(evt);
          }).send(function (err, data) {
             s3UploadFinishTime = new Date();
             if(busboyFinishTime && s3UploadFinishTime) {
                res.json({
                   uploadStartTime: uploadStartTime,
                   busboyFinishTime: busboyFinishTime,
                   s3UploadFinishTime: s3UploadFinishTime
                });
             }
             console.log(err, data);
          });
       });
       busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated, encoding, mimetype) {
          console.log('Field [' + fieldname + ']: value: ' + inspect(val));
       });
       busboy.on('finish', function() {
          console.log('Done parsing form!');
          busboyFinishTime = new Date();
          if(busboyFinishTime && s3UploadFinishTime) {
             res.json({
                uploadStartTime: uploadStartTime,
                busboyFinishTime: busboyFinishTime,
                s3UploadFinishTime: s3UploadFinishTime
             });
          }
       });
       req.pipe(busboy);
    });
    

    According to my observations, I feel confident that this is one of the best solutions to upload a file to S3 via EC2 using a REST API deployed on EC2.