node.jsgoogle-cloud-platformcredentialsgoogle-speech-apigoogle-cloud-speech

How to implement speech recognition with the Google Cloud Speech-to-Text API using Node.js?


I am new to Google Cloud, I recently created a project with Speech-to-Text API. After creating the project and linking it to a billing account, I accessed the Node.js in-console tutorial for a quick start.

After following all the steps in the in-console tutorial (and dealing with some mistakes in the code in the tutorial itself), and running the code, I got the following error:

node:internal/process/promises:289
            triggerUncaughtException(err, true /* fromPromise */);
            ^

Error: 3 INVALID_ARGUMENT: Invalid resource field value in the request.
    at callErrorFromStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/call.js:31:19)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client.js:192:76)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:323:181)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/resolving-call.js:99:78
    at process.processTicksAndRejections (node:internal/process/task_queues:77:11)
for call at
    at ServiceClientImpl.makeUnaryRequest (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client.js:160:32)
    at ServiceClientImpl.<anonymous> (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/make-client.js:105:19)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/@google-cloud/speech/build/src/v2/speech_client.js:318:29
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/timeout.js:44:16
    at repeat (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/retries.js:80:25)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/retries.js:119:13
    at OngoingCallPromise.call (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/call.js:67:27)
    at NormalApiCaller.call (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/normalApiCaller.js:34:19)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/createApiCall.js:108:30
    at process.processTicksAndRejections (node:internal/process/task_queues:95:5) {
  code: 3,
  details: 'Invalid resource field value in the request.',
  metadata: Metadata {
    internalRepr: Map(2) {
      'google.rpc.errorinfo-bin' => [
        Buffer(127) [Uint8Array] [
           10,  24,  82,  69,  83,  79,  85,  82,  67,  69,  95,  80,
           82,  79,  74,  69,  67,  84,  95,  73,  78,  86,  65,  76,
           73,  68,  18,  14, 103, 111, 111, 103, 108, 101,  97, 112,
          105, 115,  46,  99, 111, 109,  26,  32,  10,   7, 115, 101,
          114, 118, 105,  99, 101,  18,  21, 115, 112, 101, 101,  99,
          104,  46, 103, 111, 111, 103, 108, 101,  97, 112, 105, 115,
           46,  99, 111, 109,  26,  49,  10,   6, 109, 101, 116, 104,
          111, 100,  18,  39, 103, 111, 111, 103, 108, 101,  46,  99,
          108, 111, 117, 100,
          ... 27 more items
        ]
      ],
      'grpc-status-details-bin' => [
        Buffer(222) [Uint8Array] [
            8,   3,  18,  44,  73, 110, 118,  97, 108, 105, 100,  32,
          114, 101, 115, 111, 117, 114,  99, 101,  32, 102, 105, 101,
          108, 100,  32, 118,  97, 108, 117, 101,  32, 105, 110,  32,
          116, 104, 101,  32, 114, 101, 113, 117, 101, 115, 116,  46,
           26, 171,   1,  10,  40, 116, 121, 112, 101,  46, 103, 111,
          111, 103, 108, 101,  97, 112, 105, 115,  46,  99, 111, 109,
           47, 103, 111, 111, 103, 108, 101,  46, 114, 112,  99,  46,
           69, 114, 114, 111, 114,  73, 110, 102, 111,  18, 127,  10,
           24,  82,  69,  83,
          ... 122 more items
        ]
      ]
    },
    options: {}
  },
  note: 'Exception occurred in retry method that was not classified as transient',
  statusDetails: [
    ErrorInfo {
      metadata: {
        service: 'speech.googleapis.com',
        method: 'google.cloud.speech.v2.Speech.Recognize'
      },
      reason: 'RESOURCE_PROJECT_INVALID',
      domain: 'googleapis.com'
    }
  ],
  reason: 'RESOURCE_PROJECT_INVALID',
  domain: 'googleapis.com',
  errorInfoMetadata: {
    service: 'speech.googleapis.com',
    method: 'google.cloud.speech.v2.Speech.Recognize'
  }
}

This is after I seted a project for quota purposes (using gcloud auth application-default set-quota-project $PROJECT_ID in the terminal).

I also did the same steps through the gcloud CLI (on Windows) and it returned the same error.

Is there something I forgot to do? Or something in the code is wrong? Maybe it's something to do with the fact that I'm working with version 2 of the API and the code is optimized to work only with version 1 of the API?

Many thanks in advance!

The reusable Node.js code (I made some changes from the original code of the tutorial after encountering errors, and I also changed the way to get the data of the audio file after I couldn't get it from the uri of the bucket, as is customary in GCP):

const speech = require('@google-cloud/speech').v2;
const https = require('https');

const client = new speech.SpeechClient();
const projectId = '<ProjectId>';
var recognizerName;

const fileUrl = 'https://storage.googleapis.com/cloud-samples-data/speech/brooklyn_bridge.wav';

async function createRecognizer() {
  const recognizerRequest = {
    parent: `projects/${projectId}/locations/global`,
    recognizerId: 'en123',
    recognizer: {
      languageCodes: ['en-US'],
      model: 'latest_long',
    },
  };

  const operation = await client.createRecognizer(recognizerRequest);
  const recognizer = operation[0].result;
  recognizerName = recognizer.name;
  console.log(`Created new recognizer: ${recognizerName}`);
}

async function getUrlData(url) {
  return new Promise((resolve) => {
      https.get(url, response => {
          var body = '';
          var i = 0;
          response.on('data', chunk => {
              i++;
              body += chunk;
          });
          response.on('end', () => {
              resolve(Buffer.from(body).toString('base64'));
          });
      });
  });
}

async function transcribeFile() {
  const content = await getUrlData(fileUrl);
  const transcriptionRequest = {
    recognizer: recognizerName,
    config: {
      autoDecodingConfig: {},
    },
    content: content,
  };

  const response = await client.recognize(transcriptionRequest); // <=== The error occurs on this line
  for (const result of response[0].results) {
    console.log(`Transcript: ${result.alternatives[0].transcript}`);
  }
}

createRecognizer();
transcribeFile();

Update:

After looking at the (asker's) code in this question, I realized that I got confused in the definition of recognizerName and used only the string of the recognizerId instead of using the full path of the recognizer (projects/${projectId}/locations/global/recognizers/${recognizerId}), so currently my code looks like this:

const projectId = '<ProjectId>';
const recognizerId = 'en123';
const fileUrl = 'https://storage.googleapis.com/cloud-samples-data/speech/brooklyn_bridge.wav';

// ...

async function transcribeFile() {
  const recognizerName = `projects/${projectId}/locations/global/recognizers/${recognizerId}`;
  const content = await getUrlData(fileUrl);
  const transcriptionRequest = {
    recognizer: recognizerName,
    config: {
      autoDecodingConfig: {},
    },
    content: content,
  };

  const response = await client.recognize(transcriptionRequest); // <=== The error occurs on this line
  for (const result of response[0].results) {
    console.log(`Transcript: ${result.alternatives[0].transcript}`);
  }
}

createRecognizer();
transcribeFile();

But now I get another error:

Error: 3 INVALID_ARGUMENT: Audio data does not appear to be in a supported encoding. If you believe this to be incorrect, try explicitly specifying the decoding parameters.
    at callErrorFromStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/call.js:31:19)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client.js:192:76)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:323:181)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/resolving-call.js:99:78
    at process.processTicksAndRejections (node:internal/process/task_queues:77:11)
for call at
    at ServiceClientImpl.makeUnaryRequest (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client.js:160:32)
    at ServiceClientImpl.<anonymous> (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/make-client.js:105:19)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/@google-cloud/speech/build/src/v2/speech_client.js:318:29
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/timeout.js:44:16
    at repeat (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/retries.js:80:25)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/retries.js:119:13
    at OngoingCallPromise.call (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/call.js:67:27)
    at NormalApiCaller.call (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/normalApiCaller.js:34:19)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/createApiCall.js:108:30
    at process.processTicksAndRejections (node:internal/process/task_queues:95:5) {
  code: 3,
  details: 'Audio data does not appear to be in a supported encoding. If you believe this to be incorrect, try explicitly specifying the decoding parameters.',
  metadata: Metadata { internalRepr: Map(0) {}, options: {} },
  note: 'Exception occurred in retry method that was not classified as transient'
}

Solution

  • I managed to solve it!

    The error Audio data does not appear to be in a supported encoding. If you believe this to be incorrect, try explicitly specifying the decoding parameters occurs when the requested file is in a format without Codec (in this case .raw) and therefore the Speech to Text API does not know how to encode it.

    To solve this I had to pass an explicitDecodingConfig object when creating the recognizer (within the defaultRecognitionConfig property) or later in the transcriptionRequest (within the config property).

    const speech = require('@google-cloud/speech').v2;
    
    const projectId = '<ProjectId>';
    const recognizerId = 'en123';
    
    const client = new speech.SpeechClient();
    
    async function createRecognizer() {
      const recognizerRequest = {
        parent: `projects/${projectId}/locations/global`,
        recognizerId,
        recognizer: {
          defaultRecognitionConfig: {
            explicitDecodingConfig: {
              encoding: 'LINEAR16',
              sampleRateHertz: 16000,
              audioChannelCount: 1
            }
          },
          languageCodes: ['en-US'],
          model: 'latest_long',
        },
      };
    
      const operation = await client.createRecognizer(recognizerRequest);
      const recognizer = operation[0].result;
      const recognizerName = recognizer.name;
      console.log(`Created new recognizer: ${recognizerName}`);
    }
    
    async function transcribeFile() {
      const recognizerName = `projects/${projectId}/locations/global/recognizers/${recognizerId}`;
      const gcsUri = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw';
    
      const transcriptionRequest = {
        recognizer: recognizerName,
        uri: gcsUri,
        config: {
          explicitDecodingConfig: {   // If you didn't set explicitDecodingConfig when creating the recognizer, you have to set it here.
            encoding: 'LINEAR16',
            sampleRateHertz: 16000,
            audioChannelCount: 1
          }
        },
      }
    
      const response = await client.recognize(transcriptionRequest);
      for (const result of response[0].results) {
        console.log(`Transcript: ${result.alternatives[0].transcript}`);
      }
    }
    
    (async function main() {
      await createRecognizer();
      await transcribeFile();
    })();