I'm trying to be able to generate 1k reports using and endpoint with Express.js, I pass an array in a JSON, the API goes over it and in a forEach loop, then every object is used to scrape a portal, get the response, and create a PDF file...
This approach pseudo work, but I'm pretty sure that there are some concurrency problems... because, if I pass 2 items in the JSON array the API can create the 2 PDF files without a problem, but if I pass 300 the API creates randomly 50... or 60 or 120.
This is my jsreport config
const jsReportConfig = {
extensions: {
"chrome-pdf": {
launchOptions: {
timeout: 10000,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
},
},
},
tempDirectory: path.resolve(__dirname, './../../temporal/pdfs'),
templatingEngines: {
numberOfWorkers: 4,
timeout: 180000,
strategy: 'http-server',
},
};
I setup the jsreport instance like this
jsreport.use(jsReportChrome());
jsreport.use(jsReportHandlebars());
jsreport.init()
And, this is how I render the reports, the checkInvoiceStatus
function is used as an HTTP call that returns an HTML response that is injected in the Handlebars template.
const renderReports = (reporter, invoices) => new Promise(async (resolve, reject) => {
try {
const templateContent = await readFile(
path.resolve(__dirname, './../templates/hello-world.hbs'),
'utf-8',
);
invoices.forEach(async (invoice) => {
try {
const response = await checkInvoiceStatus(invoice.re, invoice.rr, invoice.id)
const $ = await cheerio.load(response);
const reporterResponse = await reporter.render({
template: {
content: templateContent,
engine: 'handlebars',
recipe: 'chrome-pdf',
name: 'PDF Validation',
chrome: {
displayHeaderFooter: true,
footerTemplate: '<table width=\'100%\' style="font-size: 12px;"><tr><td width=\'33.33%\'>{#pageNum} de {#numPages}</td><td width=\'33.33%\' align=\'center\'></td><td width=\'33.33%\' align=\'right\'></td></tr></table>',
},
},
data: {
taxpayerId: 'CAC070508MY2',
captcha: $('#ctl00_MainContent_ImgCaptcha').attr('src'),
bodyContent: $('#ctl00_MainContent_PnlResultados').html(),
},
});
reporterResponse.result.pipe(fs.createWriteStream(`./temporal/validatedPdfs/${invoice.id}.pdf`));
} catch (err) {
console.error(err);
reject(new Error(JSON.stringify({
code: 'PORTAL-PDFx001',
message: 'The server could not retrieve the PDF from the portal',
})));
}
});
resolve();
} catch (err) {
console.error(err);
reject(new Error(JSON.stringify({
code: 'PORTAL-PDFx001',
message: 'The server could not retrieve the PDF from the portal',
})));
}
});
I don't know why, but this function is terminated in 500ms, but the files are been created after 1 minute...
app.post('/pdf-report', async (req, res, next) => {
const { invoices } = req.body;
repository.renderReports(reporter, invoices)
.then(() => res.status(200).send('Ok'))
.catch((err) => {
res.status(500).send(err);
});
});
UPDATE
Alongside the code presented by @hurricane, I had to change the jsReport config to this
const jsReportConfig = {
chrome: {
timeout: 180000,
strategy: 'chrome-pool',
numberOfWorkers: 4
},
extensions: {
'chrome-pdf': {
launchOptions: {
timeout: 180000,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
ignoreDefaultArgs: ['--disable-extensions'],
},
},
},
tempDirectory: path.resolve(__dirname, './../../temporal/pdfs'),
templatingEngines: {
numberOfWorkers: 4,
timeout: 180000,
strategy: 'http-server',
},
};
I think in your structure it is easy to solve your problem with writeFileSync
function instead of using writeStream. But it does not mean it is the best approach. Because you have to wait for each render and each write file process to start another one. So i would suggest to use Promise.all
that you can run your long processes at the same time. That means you will wait only for longest process.
Quick win - Slow process
reporterResponse.result.pipe
Change this line with
fs.createFileSync(`./temporal/validatedPdfs/${invoice.id}.pdf`);
Better Approach - Fast process
const renderReports = (reporter, invoices) => new Promise(async (resolve, reject) => {
try {
const templateContent = await readFile(
path.resolve(__dirname, './../templates/hello-world.hbs'),
'utf-8',
);
const renderPromises = invoices.map((invoice) => {
const response = await checkInvoiceStatus(invoice.re, invoice.rr, invoice.id)
const $ = await cheerio.load(response);
return await reporter.render({
template: {
content: templateContent,
engine: 'handlebars',
recipe: 'chrome-pdf',
name: 'PDF Validation',
chrome: {
displayHeaderFooter: true,
footerTemplate: '<table width=\'100%\' style="font-size: 12px;"><tr><td width=\'33.33%\'>{#pageNum} de {#numPages}</td><td width=\'33.33%\' align=\'center\'></td><td width=\'33.33%\' align=\'right\'></td></tr></table>',
},
},
data: {
taxpayerId: 'CAC070508MY2',
captcha: $('#ctl00_MainContent_ImgCaptcha').attr('src'),
bodyContent: $('#ctl00_MainContent_PnlResultados').html(),
},
});
});
const renderResults = await Promise.all(renderPromises);
const filewritePromises = results.map(renderedResult => await fs.writeFile(`./temporal/validatedPdfs/${invoice.id}.pdf`, renderedResult.content));
const writeResults = await Promise.all(filewritePromises);
resolve(writeResults);
} catch (err) {
console.error(err);
reject(new Error(JSON.stringify({
code: 'PORTAL-PDFx001',
message: 'The server could not retrieve the PDF from the portal',
})));
}
});