I have a scenario to automate the PDF contents. How to retrieve the content of the PDF file in nodejs.
I am completely blocked for this. Although there are few posts on pdf2jsona
and jsonreader
but those are not working for me. Any help will be appreciated for the same.
var pdfParser = new PDFParser();
fs.readFile(pdfFilePath, function(err, pdfBuffer) {
pdfParser.parseBuffer(pdfBuffer);
}, function(pdfBuffer){
pdfParser.parseBuffer(pdfBuffer);
})
Error: Invalid parameter array, need either .data or .url at FSReqWrap.readFileAfterClose [as oncomplete] (fs.js:445:3)
I found the answer and it's working perfectly. Install fs and pdf2json by running the below commands.
npm install pdf2json
and npm install fs
var fs = require('fs');
var PDFParser = require('pdf2json');
var path = osHomedir();
var homepath = path.replace(new RegExp('\\' + path.sep, 'g'), '/');
var pdfFilePath = homepath + '/Downloads/' + 'filename.pdf';
if (fs.existsSync(pdfFilePath)) {
//Read the content of the pdf from the downloaded path
var pdfParser = new PDFParser(browser, 1);
pdfParser.on("pdfParser_dataError", function (errData) {
console.error(errData.parserError)
});
pdfParser.on("pdfParser_dataReady", function (pdfData) {
//console.log('here is the content: '+pdfParser.getRawTextContent());
browser.assert.ok(pdfParser.getRawTextContent().indexOf(textToVerify) > -1);
});
pdfParser.loadPDF(pdfFilePath);
} else {
console.log('OOPs file not present in the downloaded folder');
//Throw an error if the file is not found in the path mentioned
browser.assert.ok(fs.existsSync(pdfFilePath));
}