Npm package csvtojson Package Link
csvtojson({
"delimiter": ";",
"fork": true
})
.fromStream(fileReadStream)
.subscribe((dataObj) => {
console.log(dataObj);
}, (err) => {
console.error(err);
}, (success) => {
console.log(success);
});
While trying to handle large CSV file (about 1.3 million records) I face error "CSV Parse Error: Error: unclosed_quote." after certain records(e.g. after 400+ records) being processed successfully. From the CSV file i don't see any problems with data formatting there, however the parser might be raising this error because of "\n" character being found inside the column/field value.
Any help will be much appreciated.
I've played about with this, and it's possible to hook into this using a CSV File Line Hook, csv-file-line-hook, you can check for invalid lines and either repair or simply invalidate them.
The example below will simply skip the invalid lines (missing end quotes)
example.js
const fs = require("fs");
let fileReadStream = fs.createReadStream("test.csv");
let invalidLineCount = 0;
const csvtojson = require("csvtojson");
csvtojson({ "delimiter": ";", "fork": true })
.preFileLine((fileLineString, lineIdx)=> {
let invalidLinePattern = /^['"].*[^"'];/;
if (invalidLinePattern.test(fileLineString)) {
console.log(`Line #${lineIdx + 1} is invalid, skipping:`, fileLineString);
fileLineString = "";
invalidLineCount++;
}
return fileLineString
})
.fromStream(fileReadStream)
.subscribe((dataObj) => {
console.log(dataObj);
},
(err) => {
console.error("Error:", err);
},
(success) => {
console.log("Skipped lines:", invalidLineCount);
console.log("Success");
});
test.csv
Name;Age;Profession
Bob;34;"Sales,Marketing"
Sarah;31;"Software Engineer"
James;45;Driver
"Billy, ;35;Manager
"Timothy;23;"QA