Since IBM changed the authentication method for Watson, we were trying to implement that in our code, but we are not able to receive any data from the TTS service using their SDK, or raw websockets.
The only thing that works is the HTTP API, which returns a response like this. It is not a valid json, and is not a buffer too.
We already opened an issue in the nodejs SDK, but we want to move with the HTTP API for now.
Here is how to get a similar response:
let requestPromise = require('request-promise-native');
let fs = require("fs")
let postData = {
"grant_type":"urn:ibm:params:oauth:grant-type:apikey",
"apikey":"<api_key>"
};
let opts = {
uri : "https://iam.bluemix.net/identity/token",
headers : {
"Content-Type": "application/x-www-form-urlencoded",
"Accept": "application/json"
},
method: "POST",
form: postData
}
requestPromise(opts).then((body)=>{
let token = JSON.parse(body).access_token;
let postData = {
"text": 'Hello world',
"accept": 'audio/mp3',
"voice": 'en-US_AllisonVoice'
};
let opts = {
uri : "https://gateway-syd.watsonplatform.net/text-to-speech/api/v1/synthesize",
headers : {
"Content-Type": "application/json",
"Accept": "application/json",
// "Accept": "audio/mp3",
'Content-Length' : Buffer.byteLength(JSON.stringify(postData)),
"Authorization": "Bearer "+token
},
method: "POST",
json: postData
}
requestPromise(opts).then((body)=>{
let chunkStream = fs.createWriteStream('./audio.mp3')
let buf = Buffer.from(body, 'base64')
chunkStream.write(buf)
}).catch((err)=>{
if (err) throw err;
})
}).catch((err)=>{
if (err) throw err;
})
We don't know how to deal with that response, and saving it as a base64 buffer to an mp3, produce a corrupted audio file, and that is the case if you save the response directly to the file, or changed the Accept
header to audio/mp3
. We even tried to run the audio files through mp3val which fixes a lot of a similar problems, but that didn't work either.
You can get your result using the official node api:
npm install --save watson-developer-cloud
and after that
var TextToSpeechV1 = require('watson-developer-cloud/text-to-speech/v1');
var fs = require('fs');
var textToSpeech = new TextToSpeechV1({
iam_apikey: 'API_KEY',
url: 'https://gateway-syd.watsonplatform.net/text-to-speech/api/'
});
var synthesizeParams = {
text: 'How are you doing?',
accept: 'audio/wav',
voice: 'en-US_AllisonVoice'
};
textToSpeech.synthesize(synthesizeParams, function (err, audio) {
if (err) {
// do something
console.log('failure');
return;
}
fs.writeFileSync('result-audio.wav', audio);
console.log('scuccess');
});
Note that wrapping it into TextToSpeechV1
changes the link since /v1/synthesize is called implicitly.