javascriptnode.jsmysql2exceljsnode.js-fs

NodeJS large data excel


I have some problems on how to write large data to an excel file in a way that optimizes server memory. I read data from SQL database to the server about more than 1 million rows, this takes a lot of time and memory, then I write them to an excel file and consumes a lot of server memory, so what do I do? How can I optimize this? Is there a better way?

I tried the following code:

const express = require('express');
const ExcelJS = require('exceljs');
const fs = require('fs');
const app = express();
var db = require.main.require('./src/app/models/db_controller');

app.get('/download-excel', (req, res) => {
  const workbook = new ExcelJS.Workbook();
  const worksheet = workbook.addWorksheet('Sheet1');
  worksheet.columns = [
    { header: 'Header 1', key: 'date_time' },
    { header: 'Header 2', key: 'shaft_seal_pressure' },
    { header: 'Header 3', key: 'transfer_pressure' },
    { header: 'Header 4', key: 'cip_tem' },
    { header: 'Header 5', key: 'elect_usage' },];
  db.read_data_all('master_data', (err, result) => {
    if (err) {
      console.log(err);
    } else {
      worksheet.addRows(result);
      const tempFilePath = 'temp.xlsx';
      workbook.xlsx.writeFile(tempFilePath)
        .then(() => {
          res.setHeader('Content-Type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
          res.setHeader('Content-Disposition', 'attachment; filename=excel_template.xlsx');
          const fileStream = fs.createReadStream(tempFilePath);
          fileStream.pipe(res);
          fileStream.on('end', () => {
            fs.unlink(tempFilePath, (err) => {
              if (err) {
                console.error('Lỗi khi xóa tệp tạm thời:', err);
              }
            });
          });
        })
        .catch((error) => {
          console.error('Lỗi:', error);
          res.status(500).send('Đã xảy ra lỗi trong quá trình tạo tệp Excel.');
        });
    }
  });
});

app.listen(5000, () => {
  console.log('Server is running on port 5000');
});

Result

    <--- Last few GCs --->
    
    [14120:000001B1C00BAA90]    75907 ms: Mark-sweep (reduce) 2046.9 (2082.7) -> 2046.5 (2083.5) MB, 2902.5 / 0.0 ms  (average mu = 0.172, current mu = 0.001) allocation failure; scavenge might not succeed
    
    
    <--- JS stacktrace --->
    
    FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory
     1: 00007FF79DF307BF node_api_throw_syntax_error+175823
     2: 00007FF79DEB5796 DSA_meth_get_flags+59654
     3: 00007FF79DEB7480 DSA_meth_get_flags+67056
     4: 00007FF79E95DCC4 v8::Isolate::ReportExternalAllocationLimitReached+116
     5: 00007FF79E949052 v8::Isolate::Exit+674
     6: 00007FF79E7CAF0C v8::internal::EmbedderStackStateScope::ExplicitScopeForTesting+124
     7: 00007FF79E7C812B v8::internal::Heap::CollectGarbage+3963
     8: 00007FF79E7DE363 v8::internal::HeapAllocator::AllocateRawWithLightRetrySlowPath+2099
     9: 00007FF79E7DEC0D v8::internal::HeapAllocator::AllocateRawWithRetryOrFailSlowPath+93
    10: 00007FF79E7EE3D0 v8::internal::Factory::NewFillerObject+816
    11: 00007FF79E4DF315 v8::internal::DateCache::Weekday+1349
    12: 00007FF79E9FB1F1 v8::internal::SetupIsolateDelegate::SetupHeap+558193
    13: 00007FF79E980D02 v8::internal::SetupIsolateDelegate::SetupHeap+57218
    14: 00007FF71EC10EC4

Solution

  • It's look like you read WHOLE dataset from DB in memory and try in promise style create temp file and then send in back. I highly recommend read data from database in stream row-by-row (or in case of complex query by chunk of data) and then write data in another write stream to excel file.

    For example without any technical details:

    const dbStream = this.sqlClient.query(`SELECT * FROM sometable`).stream();
    
    const someChangesStream = new CustomDataTransform();
    const excelStream = new XLSXTransformStream();
    
    const pipe = dbStream.pipe(someChangesStream).pipe(excelStream)
    
    return pipe;
    

    You can use libs (like exceljs) and some custom Transform stream.