javascriptnode.jstext-to-speechibm-watsonwatson-text-to-speech

Parsing Watson TTS HTTP response in nodejs


Since IBM changed the authentication method for Watson, we were trying to implement that in our code, but we are not able to receive any data from the TTS service using their SDK, or raw websockets.

The only thing that works is the HTTP API, which returns a response like this. It is not a valid json, and is not a buffer too.

We already opened an issue in the nodejs SDK, but we want to move with the HTTP API for now.

Here is how to get a similar response:

let requestPromise = require('request-promise-native');
let fs = require("fs")

let postData = {
    "grant_type":"urn:ibm:params:oauth:grant-type:apikey",
    "apikey":"<api_key>"
};

let opts = {
    uri : "https://iam.bluemix.net/identity/token",
    headers : {
        "Content-Type": "application/x-www-form-urlencoded",
        "Accept": "application/json"
    },
    method: "POST",
    form: postData
}

requestPromise(opts).then((body)=>{
    let token = JSON.parse(body).access_token;

    let postData = {
        "text": 'Hello world',
        "accept": 'audio/mp3',
        "voice": 'en-US_AllisonVoice'
    };

    let opts = {
        uri : "https://gateway-syd.watsonplatform.net/text-to-speech/api/v1/synthesize",
        headers : {
            "Content-Type": "application/json",
            "Accept": "application/json",
            // "Accept": "audio/mp3",
            'Content-Length' : Buffer.byteLength(JSON.stringify(postData)),
            "Authorization": "Bearer "+token
        },
        method: "POST",
        json: postData
    }

    requestPromise(opts).then((body)=>{
        let chunkStream = fs.createWriteStream('./audio.mp3')
        let buf = Buffer.from(body, 'base64')
        chunkStream.write(buf)
    }).catch((err)=>{
        if (err) throw err;
    })
}).catch((err)=>{
    if (err) throw err;
})

We don't know how to deal with that response, and saving it as a base64 buffer to an mp3, produce a corrupted audio file, and that is the case if you save the response directly to the file, or changed the Accept header to audio/mp3. We even tried to run the audio files through mp3val which fixes a lot of a similar problems, but that didn't work either.


Solution

  • You can get your result using the official node api:

    npm install --save watson-developer-cloud
    

    and after that

    var TextToSpeechV1 = require('watson-developer-cloud/text-to-speech/v1');
    var fs = require('fs');
    
    var textToSpeech = new TextToSpeechV1({
      iam_apikey: 'API_KEY',
      url: 'https://gateway-syd.watsonplatform.net/text-to-speech/api/'
    });
    
    var synthesizeParams = {
      text: 'How are you doing?',
      accept: 'audio/wav',
      voice: 'en-US_AllisonVoice'
    };
    
    
    textToSpeech.synthesize(synthesizeParams, function (err, audio) {
      if (err) {
        // do something
        console.log('failure');
        return;
      }
    
      fs.writeFileSync('result-audio.wav', audio);
      console.log('scuccess');
      });
    

    Note that wrapping it into TextToSpeechV1 changes the link since /v1/synthesize is called implicitly.