reactjsamazon-web-servicesgetusermediamediarecorderamazon-transcribe

Encode MediaRecorder stream in PCM format for AWS transcribe


So I'm trying to use AWS transcribe medical but unable to achieve as AWS transcribe medical require "pcm" encoded audio data. I tried multiple custom function but no luck. I want to use something like MediaRecorder start method because i want to send data in chunks in real time.

import React, { useState, useRef } from "react";
import {
  TranscribeStreamingClient,
  StartMedicalStreamTranscriptionCommand,
} from "@aws-sdk/client-transcribe-streaming";
import logo from "./logo.svg";
import "./App.css";

function App() {
  const [isRecording, setIsRecording] = useState(false);
  const mediaRecorder = useRef(null);
  const audioChunks = useRef([]);
  const transcribeClient = useRef(null);
  const client = new TranscribeStreamingClient({
    region: "region",
    credentials: {
      accessKeyId: "accessKeyId",
      secretAccessKey: "secretAccessKey",
    },
  });

  const handleMicClick = () => {
    if (!isRecording) {
      startRecording();
    } else {
      stopRecording();
    }
  };

  const startRecording = async () => {
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      mediaRecorder.current = new MediaRecorder(stream);
      mediaRecorder.current.addEventListener(
        "dataavailable",
        handleDataAvailable
      );
      mediaRecorder.current.start(1000);
      setIsRecording(true);
    } catch (error) {
      console.error("Error starting recording: ", error);
    }
  };

  const stopRecording = () => {
    mediaRecorder.current.stop();
    setIsRecording(false);
    transcribeClient.current.destroy();
    transcribeClient.current = null;
  };

  const handleDataAvailable = (event) => {
    audioChunks.current.push(event.data);
    if (transcribeClient.current === null) {
      transcribeClient.current = client;
    }
    sendAudio(event.data);
  };

  const sendAudio = async (audioData) => {
    const input = {
      LanguageCode: "en-US",
      MediaSampleRateHertz: 44100,
      MediaEncoding: "pcm", // required
      Specialty: "PRIMARYCARE", // required
      Type: "CONVERSATION", // required
      AudioStream: audioData, // -> This needs to be in pcm format
    };

    const command = new StartMedicalStreamTranscriptionCommand(input);
    const response = await client.send(command);
    console.log("Response: ", response.TranscriptResultStream);

    for await (const event of response.TranscriptResultStream) {
      if (event.TranscriptEvent) {
        const message = event.TranscriptEvent;
        // Get multiple possible results
        const results = event.TranscriptEvent.Transcript.Results;
        // Print all the possible transcripts
        results.map((result) => {
          (result.Alternatives || []).map((alternative) => {
            const transcript = alternative.Items.map(
              (item) => item.Content
            ).join(" ");
            console.log("something is happening", transcript);
          });
        });
      }
    }
  };

  return (
    <div className="App">
      <header className="App-header">
        <img src={logo} className="App-logo" alt="logo" />
        <p>
          Edit <code>src/App.js</code> and save to reload.
        </p>
        <a
          className="App-link"
          href="https://reactjs.org"
          target="_blank"
          rel="noopener noreferrer"
        >
          Learn React
        </a>
        <button onClick={handleMicClick}>
          {isRecording ? "Stop" : "Record"}
        </button>
      </header>
    </div>
  );
}

export default App;

If there is any package which will do the job then please mention.


Solution

  • Solution is bit tricky, we need to use browserify package to bundle AWS transibe for frontend.

    Your package.json will look like this

    "scripts": {
            "start": "react-scripts start",
            "build": "react-scripts build",
            "test": "react-scripts test",
            "eject": "react-scripts eject",
            "browserify": "browserify src/lib/main.js -o public/aws_transcribe.js -d"
    },
    "dependencies": {
      "@aws-sdk/eventstream-codec": "^3.329.0",
      "@aws-sdk/util-utf8": "^3.310.0",
      "microphone-stream": "^5.3.1",
      "query-string": "^6.12.1",
      "util": "^0.12.4"
    },
    "devDependencies": {
            "@babel/core": "^7.21.8",
            "@babel/preset-env": "^7.21.5",
            "@babel/preset-react": "^7.18.6",
            "babel-loader": "^9.1.2",
            "babelify": "^10.0.0",
            "browserify": "^17.0.0",
            "eslint": "^8.10.0",
            "eslint-config-airbnb": "^19.0.4",
            "eslint-config-prettier": "^8.5.0",
            "eslint-plugin-import": "^2.25.4",
            "eslint-plugin-jsx-a11y": "^6.5.1",
            "eslint-plugin-prettier": "^4.0.0",
            "eslint-plugin-react": "^7.29.3",
            "eslint-plugin-react-hooks": "^4.3.0",
            "file-loader": "^6.2.0",
            "prettier": "^2.7.1"
        },
        "browserify": {
            "transform": [
                [
                    "babelify",
                    {
                        "presets": [
                            "@babel/env"
                        ]
                    }
                ]
            ]
    }
    
    

    Your src/lib/main.js file will have https://gist.github.com/shubhamambastha/838e81dbefd9fe3a647f42eeaebc7ae1#file-main-js

    Then you need to run browserify script, this will create aws_transcribe.js file in public.

    if you're using react then in your public/index.html add browerified script

     <script src="%PUBLIC_URL%/aws_transcribe.js" crossorigin="anonymous"></script>
    

    Then you need to create awsTranscribe context to use across your app. Everything will be in above gist. Please check that.

    Example to use

    import React from 'react'
    import { TranscribeContext } from '../../../context/awsTranscribe'
    
    export const Example = () => {
        const { awsTranscribe } = React.useContext(TranscribeContext)
    
        React.useEffect(() => {
            const handleTranscriptError = (event) => {
                const errorMessage = event.detail.error
                dispatch(
                    setAlert({
                        alertType: 'error',
                        alertMsg: 'Error in AWS Medical Transcribe',
                        alertOpen: true,
                    })
                )
                awsTranscribe.stop()
                setRecordingIndex(false)
                console.log('Error in AWS Transcribe: ', errorMessage)
            }
    
            document.addEventListener('transcript-error', handleTranscriptError)
            return () => {
                document.removeEventListener('transcript-error', handleTranscriptError)
            }
        }, [])
    
        React.useEffect(() => {
            const handleTranscript = (event) => {
                const isFinal = event.detail.isFinal
                const transcript = event.detail.transcript
                const valuesRef = someValuesRef.current[recordingIndex]
                if (isFinal) {
                    const updatedText = valuesRef?.notes + transcript + '\n'
                    setFieldValueRef.current(`recommendation[${recordingIndex}].notes`, updatedText)
                    setPartialTranscript('')
                } else {
                    // need to add logic for partial transcript
                    const partialTranscript = transcript + '...' // Modify this based on your desired format
                    setPartialTranscript(partialTranscript)
                }
            }
    
            if (setFieldValueRef.current) {
                document.addEventListener('transcript', handleTranscript)
            }
            return () => {
                document.removeEventListener('transcript', handleTranscript)
            }
        }, [recordingIndex])
    
        const handleClose = (event, reason) => {
            if (reason && reason == 'backdropClick') return
            if (awsTranscribe.socket) {
                awsTranscribe.stop()
            }
            dispatch(toggleRecommendationsModal(false))
        }
    
        const handleRecording = (checker, index) => {
            if (checker === 'uncheck') {
                setRecordingIndex(index)
                awsTranscribe.start()
            } else {
                setRecordingIndex(false)
                awsTranscribe.stop()
            }
        }
    
        return (
            <div> </div>
        )
    }
    
    export default Example 
    

    I hope this help