I am using the npm xlsx (lib/parserScripts/readExcel.js) and threads module to read a large excel file.
This works fine for the first time but if I simultaneously upload another large file then I get an error
Error: channel closed
at ChildProcess.target.send (internal/child_process.js:554:16)
at Worker.send (/app/node_modules/threads/lib/worker.node/worker.js:108:16)...
This is maybe due to the previous threads are still processing /have not been killed hence when a new pool is made for another request the previous threads are still busy processing.
How to solve this? Do I have to manually terminate the threads in the below piece of code? If so then how?
index.js
parseFile: ['fileHeaders', (results, cb) => {
const excelParserScript = __dirname + '/../lib/parserScripts/readExcel';
const worksheetIndex = 3;
const params = {
file.path,
worksheetIndex
}
// using worker process
// result will be of the type {error: false, message: '', data: {}}
lib.miniWorker.bufferedJob(excelParserScript, params, (err, result) => {
lib/miniworker.js
const Threads = require('threads');
const Pool = Threads.Pool;
const workerPool = new Pool();
module.exports = class JobManager {
static bufferedJob(pathToScript, params, callback){
workerPool
.run(pathToScript)
.send(params)
.on('done', (result, input) => {
console.log(`Worker Job done: ${pathToScript} `);
callback(null, result);
})
.on('error', (job, error) => {
console.log(`Error in executing Worker Job: ${pathToScript}`);
callback(job || error);
})
}
}
lib/parserScripts/readExcel.js
module.exports = function(input, done) {
const XLSX = require('xlsx');
let workbook;
const path = input.path;
const worksheetIndex = input.worksheetIndex;
const expectedHeaders = input.expectedHeaders || [];
const options = {};
if (expectedHeaders.length > 0) {
options.header = expectedHeaders;
}
const response = {
error: false,
message: '',
data: {}
}
try {
workbook = XLSX.readFile(path, {});
const sheet = workbook['Sheets'][workbook.SheetNames[worksheetIndex]];
const headers = getHeaders(sheet);
const fileData = XLSX.utils.sheet_to_json(workbook['Sheets'][workbook.SheetNames[worksheetIndex]], options);
response.data = fileData;
response.headers = headers;
return done(response)
} catch (err) {
response.error = true;
response.messsage = 'Error in reading the file';
return done(response);
}
function getHeaders(sheet) {
var header = 0, offset = 1;
var hdr = [];
var o = {};
if (sheet == null || sheet["!ref"] == null) return [];
var range = o.range !== undefined ? o.range : sheet["!ref"];
var r;
if (o.header === 1) header = 1;
else if (o.header === "A") header = 2;
else if (Array.isArray(o.header)) header = 3;
switch (typeof range) {
case 'string':
r = safe_decode_range(range);
break;
case 'number':
r = safe_decode_range(sheet["!ref"]);
r.s.r = range;
break;
default:
r = range;
}
if (header > 0) offset = 0;
var rr = XLSX.utils.encode_row(r.s.r);
var cols = new Array(r.e.c - r.s.c + 1);
for (var C = r.s.c; C <= r.e.c; ++C) {
cols[C] = XLSX.utils.encode_col(C);
var val = sheet[cols[C] + rr];
switch (header) {
case 1:
hdr.push(C);
break;
case 2:
hdr.push(cols[C]);
break;
case 3:
hdr.push(o.header[C - r.s.c]);
break;
default:
if (val === undefined) continue;
hdr.push(XLSX.utils.format_cell(val));
}
}
return hdr;
}
function safe_decode_range(range) {
var o = {s: {c: 0, r: 0}, e: {c: 0, r: 0}};
var idx = 0, i = 0, cc = 0;
var len = range.length;
for (idx = 0; i < len; ++i) {
if ((cc = range.charCodeAt(i) - 64) < 1 || cc > 26) break;
idx = 26 * idx + cc;
}
o.s.c = --idx;
for (idx = 0; i < len; ++i) {
if ((cc = range.charCodeAt(i) - 48) < 0 || cc > 9) break;
idx = 10 * idx + cc;
}
o.s.r = --idx;
if (i === len || range.charCodeAt(++i) === 58) {
o.e.c = o.s.c;
o.e.r = o.s.r;
return o;
}
for (idx = 0; i != len; ++i) {
if ((cc = range.charCodeAt(i) - 64) < 1 || cc > 26) break;
idx = 26 * idx + cc;
}
o.e.c = --idx;
for (idx = 0; i != len; ++i) {
if ((cc = range.charCodeAt(i) - 48) < 0 || cc > 9) break;
idx = 10 * idx + cc;
}
o.e.r = --idx;
return o;
}
}
The issue is because of the older version module of threads. Updating to the new version and using the updated API which is not event-based can solve the purpose.
https://github.com/andywer/threads.js/issues/164
However, if you want to correct the event-based code(from older version) this is what you need to do (kill the threads after the event gets completed).
const Threads = require('threads');
const Pool = Threads.Pool;
module.exports = class JobManager {
static bufferedJob(pathToScript, params, callback){
let workerPool = new Pool();
workerPool
.run(pathToScript)
.send(params)
.on('done', (result, input) => {
console.log(`Worker Job done: ${pathToScript} `);
callback(null, result);
workerPool.killAll();
workerPool = null ;
})
.on('error', (job, error) => {
console.log(`Error in executing Worker Job: ${pathToScript}`);
callback(job || error);
workerPool.killAll();
workerPool = null ;
}).on('abort', (job, error)=>{
console.log(`Abort Worker Job: ${pathToScript}, Error : ${error}`);
callback(job || error);
workerPool.killAll();
workerPool = null ;
}).on('finished', ()=>{
console.log('Everything done, shutting down the thread pool.');
workerPool.killAll();
});
}
}