I am trying to stream a file to S3 without storing the file to disk/ssd. I would like to have part of the hash of the file as a part of the filename when uploading to S3.
EDIT_v1:
Been trying to follow this post using busboy
as the parser: Calculate a file hash and save the file. I took an example from the busboy docs and adabpted it with an answer from the post:
const server = http.createServer();
server.on('request', async (req, res) => {
if (req.method === 'POST') {
const bb = busboy({ headers: req.headers });
bb.on('file', (name, file, info) => {
const { filename, encoding, mimeType } = info;
console.log(
`File [${name}]: filename: %j, encoding: %j, mimeType: %j`,
filename,
encoding,
mimeType
);
const fileHashSource = new PassThrough();
const writeSource = new PassThrough();
file.pipe(fileHashSource);
file.pipe(writeSource);
fileHashSource.resume();
writeSource.resume();
createFileHash(fileHashSource, (err, hash) => {
if (err) {
console.log('err', err)
return res.end('some err');
}
const writeStream = fs.createWriteStream(`test_${hash.slice(0, 8)}.png`);
writeStream.on('error', function(err) {
console.log('write error', err);
return res.end('write error')
});
writeStream.on('finish', function() {
console.log('write finished')
return res.end('done')
});
writeSource.pipe(writeStream);
});
});
bb.on('field', (name, val, info) => {
console.log(`Field [${name}]: value: %j`, val);
});
bb.on('close', () => {
console.log('Done parsing form!');
req.unpipe(bb);
res.writeHead(201, { Connection: 'close' });
res.end('done!');
});
req.pipe(bb);
} else if (req.method === 'GET') {
res.writeHead(200, { Connection: 'close' });
res.end(`
<body style="background-color: black">
<form enctype="multipart/form-data" method="post">
<label>file name
<input type="text" name="textfield" />
</label><br />
<label>single file
<input type="file" name="filefield" />
</label><br />
<br />
<button type="submit">Upload</button>
</form>
</body>
`);
}
})
server.listen(3000, () => {
console.info(`NodeJS process: ${process.pid}`)
console.info(`Listening on port: 3000`)
});
function createFileHash(readStream, next) {
const hash = crypto.createHash('sha1');
hash.setEncoding('hex');
hash.on('error', function(err) {
console.log('hash error')
return next(err);
});
hash.on('finish', function(data) {
console.log('hash finished');
return next(null, hash.read());
});
readStream.pipe(hash);
}
EDIT_v2:
see first answer below for a solution
I put the task flow in a pipeline
, implemented late piping with PassThrough
, and finally used a function that returns an async generator that uploads to S3
const { fileStream, mimeType } = createFromBusBoy();
const s3Source = new PassThrough();
filestream.on('data', chunk => {
s3Source.write(chunk);
});
filestream.on('end', () => {
s3Source.end();
});
const hash = createHash('sha256');
hash.setEncoding('hex');
try {
await pipeline(
filestream,
hash,
uploadImage(s3Source, mimeType),
);
} catch (err) {
console.log(err)
throw err;
}
function uploadImage(fileStream, mimeType) {
return async function* (source, signal) {
let hash;
for await (const chunk of source) {
hash = chunk;
}
yield await uploadToS3(filestream, hash, mimeType);
};
}