I am trying to convert this document (http://www.redbooks.ibm.com/redbooks/pdfs/ga195486.pdf) to answer units in Watson's Document Conversion service using the watson-developer-cloud node.js library.
In the actual program (not this test program), I am retrieving the document and converting it on-the-fly, without writing it to disk first. I have done this before with other documents, but the latest version of the library (v 1.7.0) seems to have changed and it no longer works the way I was using it. But even before I started using the latest version, this particular document would not convert.
The annotated test code that I am using is below. I have tried several ways to get this to work, the variations of which are all commented out under var opts={ below. You have to uncomment one of them at a time to see the results.
'use strict';
var bluemix = require('./bluemix');
var extend=require('util')._extend;
var fs=require('fs');
var watson=require('watson-developer-cloud');
var streams = require('memory-streams');
var dcCredentials = extend({
url: '<url>',
version: 'v1',
username: '<username>',
password: '<password>'
}, bluemix.getServiceCreds('document_conversion')); // VCAP_SERVICES
var document_conversion = watson.document_conversion(dcCredentials);
var bookpdf=getBook('ga195486.pdf');
convert(bookpdf);
function getBook(filename)
{
var bl=fs.readFileSync(filename,'utf8');
return bl;
}
function convert(content)
{
var opts={ //uncomment ONE of these
// file: new Buffer(content), //See message #1 below
// file: {value: new Buffer(content), options: {}}, //see message #2 below
// file: {value: new Buffer(content), options: {contentType: "application/pdf"}}, //This used to work. See message #2 (again) below
// file: new streams.ReadableStream(content),//see message #3 below
conversion_target: "ANSWER_UNITS",
content_type:'application/pdf'
};
document_conversion.convert(opts,
function (err, response)
{
if (err)
{
console.log("Error converting doc: ", err);
}
else if (response.answer_units.length==0)
{
var msg="No answer units";
console.log(msg,response);
}
else
{
console.log('Works!');
console.dir(response);
}
}
);
}
//Message #1: This returns:
// No answer units { source_document_id: '',
// timestamp: '2016-05-23T16:18:23.825Z',
// media_type_detected: 'application/pdf',
// metadata: [],
// answer_units: [],
// warnings:
// [ { phase: 'pdf',
// warning_id: 'empty_input_to_converter',
// description: 'The input provided to the converter phase is empty or doesn\'t contain text that can be converted.' },
// { phase: 'normalized_html',
// warning_id: 'empty_input_to_converter',
// description: 'The input HTML document has no body content.' },
// { phase: 'answer_units',
// warning_id: 'empty_input_to_converter',
// description: 'The input provided to the converter phase is empty or doesn\'t contain text that can be converted.' } ] }
//Message #2: These return:
///home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/node_modules/request/node_modules/combined-stream/node_modules/delayed-stream/lib/delayed_stream.js:33
// source.on('error', function() {});
//
//TypeError: source.on is not a function
// at Function.DelayedStream.create (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/node_modules/request/node_modules/combined-stream/node_modules/delayed-stream/lib/delayed_stream.js:33:10)
// at FormData.CombinedStream.append (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/node_modules/request/node_modules/combined-stream/lib/combined_stream.js:43:37)
// at FormData.append (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/node_modules/request/node_modules/form-data/lib/form_data.js:68:3)
// at appendFormValue (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/node_modules/request/request.js:339:21)
// at Request.init (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/node_modules/request/request.js:352:11)
// at new Request (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/node_modules/request/request.js:142:8)
// at request (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/node_modules/request/index.js:55:10)
// at createRequest (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/lib/requestwrapper.js:134:10)
// at DocumentConversion.convert (/home/david/git/ccb-contentbridge/node_modules/watson-developer-cloud/services/document_conversion/v1.js:134:10)
// at convert (/home/david/git/ccb-contentbridge/testRedbooks.js:35:24)
//Message #3: This returns and then it hangs there:
//Error converting doc: { code: 400, error: 'Error in the web application' }
Can someone please tell me what I am doing wrong?
That particular file is larger than what the Document Conversion service can currently handle. Unfortunately I don't have very good info on exactly what the limits are right now, but the team is aware of this and looking into making improvements.
If you can provide an example that worked previously but broke with the v1.7.0 of the node.js library, I'll take a look at that and hopefully be able to provide better info.
Oh, and specifying 'utf8'
on your fs.readfileSync()
call may be causing some of the trouble you're experiencing.