javascriptreactjsspeech-recognitionspeech-to-textazure-speech

Unable to recognize speech from microphone using Azure Speech to Text service in react js


I wrote code in react js to identify text from speech from the microphone, its unable to identify or recognized the speech, and here there is no error and I tried couple of option it is not working.

const startSpeechRecognition = async () => {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    const audioCtx = new (window.AudioContext || window.webkitAudioContext)();

    speechRecognizer.startContinuousRecognitionAsync();
    speechRecognizer.recognized = (s, e) => {
      if (e.result.reason === ResultReason.RecognizedSpeech) {
        const recognizedText = e.result.text;
        console.log('Ask: ' + recognizedText);
      }
    };
  };
  return (
    <div>
      <button
        onClick={() => {
          startSpeechRecognition();
        }}
      >
        Start
      </button>
    </div>
  );


Here I tried listing the microphone only once but does not able recognize the speech


const reco = new SpeechRecognizer(speechConfig);
    const conn = Connection.fromRecognizer(reco);
    conn.setMessageProperty('speech.context', 'phraseDetection', {
      INTERACTIVE: {
        segmentation: {
          mode: 'custom',
          segmentationSilenceTimeoutMs: 5000,
        },
      },
      mode: 'Interactive',
    });

    reco.recognizeOnceAsync(
      (result) => {
        console.log('Recognition done!!!');
        // do something with the recognition
      },
      (error) => {
        console.log('Recognition failed. Error:' + error);
      },
    );
  };
  return (
    <div>
      <button
        onClick={() => {
          startSpeechRecognition();
        }}
      >
        Start
      </button>
    </div>
)

Solution

  • I tried with below code which recognizes the Speech and shows the Text accordingly. I created a Azure Speech Service in Azure Portal and connected in my code.

    Below is the code I tried with:

    function SpeechRecognitionComponent() {
      const [recognizedText, setRecognizedText] = useState('');
      const [isListening, setIsListening] = useState(false);
    
      const startSpeechRecognition = async () => {
        const subscriptionKey = '*****';
        const serviceRegion = '*****';
    
        const speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
        const audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
    
        const recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
    
        recognizer.recognizeOnceAsync((result) => {
          if (result.reason === SpeechSDK.ResultReason.RecognizedSpeech) {
            const recognized = result.text;
            setRecognizedText(recognized);
          } else {
            console.error('Recognition failed. Error:', result.errorDetails);
          }
          setIsListening(false);
        });
    
        setIsListening(true);
      };
    
      const stopSpeechRecognition = () => {
        setIsListening(false);
      };
    
      return (
        <div>
          <h1>Speech Recognition Example</h1>
          <button onClick={isListening ? stopSpeechRecognition : startSpeechRecognition}>
            {isListening ? 'Stop' : 'Start'} Listening
          </button>
          {isListening && <p>Listening...</p>}
          <div>
            <strong>Recognized Text: </strong>
            {recognizedText}
          </div>
        </div>
      );
    }
    

    Output: enter image description here