So I'm trying to use AWS transcribe medical but unable to achieve as AWS transcribe medical require "pcm" encoded audio data. I tried multiple custom function but no luck. I want to use something like MediaRecorder start method because i want to send data in chunks in real time.
import React, { useState, useRef } from "react";
import {
TranscribeStreamingClient,
StartMedicalStreamTranscriptionCommand,
} from "@aws-sdk/client-transcribe-streaming";
import logo from "./logo.svg";
import "./App.css";
function App() {
const [isRecording, setIsRecording] = useState(false);
const mediaRecorder = useRef(null);
const audioChunks = useRef([]);
const transcribeClient = useRef(null);
const client = new TranscribeStreamingClient({
region: "region",
credentials: {
accessKeyId: "accessKeyId",
secretAccessKey: "secretAccessKey",
},
});
const handleMicClick = () => {
if (!isRecording) {
startRecording();
} else {
stopRecording();
}
};
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder.current = new MediaRecorder(stream);
mediaRecorder.current.addEventListener(
"dataavailable",
handleDataAvailable
);
mediaRecorder.current.start(1000);
setIsRecording(true);
} catch (error) {
console.error("Error starting recording: ", error);
}
};
const stopRecording = () => {
mediaRecorder.current.stop();
setIsRecording(false);
transcribeClient.current.destroy();
transcribeClient.current = null;
};
const handleDataAvailable = (event) => {
audioChunks.current.push(event.data);
if (transcribeClient.current === null) {
transcribeClient.current = client;
}
sendAudio(event.data);
};
const sendAudio = async (audioData) => {
const input = {
LanguageCode: "en-US",
MediaSampleRateHertz: 44100,
MediaEncoding: "pcm", // required
Specialty: "PRIMARYCARE", // required
Type: "CONVERSATION", // required
AudioStream: audioData, // -> This needs to be in pcm format
};
const command = new StartMedicalStreamTranscriptionCommand(input);
const response = await client.send(command);
console.log("Response: ", response.TranscriptResultStream);
for await (const event of response.TranscriptResultStream) {
if (event.TranscriptEvent) {
const message = event.TranscriptEvent;
// Get multiple possible results
const results = event.TranscriptEvent.Transcript.Results;
// Print all the possible transcripts
results.map((result) => {
(result.Alternatives || []).map((alternative) => {
const transcript = alternative.Items.map(
(item) => item.Content
).join(" ");
console.log("something is happening", transcript);
});
});
}
}
};
return (
<div className="App">
<header className="App-header">
<img src={logo} className="App-logo" alt="logo" />
<p>
Edit <code>src/App.js</code> and save to reload.
</p>
<a
className="App-link"
href="https://reactjs.org"
target="_blank"
rel="noopener noreferrer"
>
Learn React
</a>
<button onClick={handleMicClick}>
{isRecording ? "Stop" : "Record"}
</button>
</header>
</div>
);
}
export default App;
If there is any package which will do the job then please mention.
Solution is bit tricky, we need to use browserify
package to bundle AWS transibe for frontend.
Your package.json
will look like this
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test",
"eject": "react-scripts eject",
"browserify": "browserify src/lib/main.js -o public/aws_transcribe.js -d"
},
"dependencies": {
"@aws-sdk/eventstream-codec": "^3.329.0",
"@aws-sdk/util-utf8": "^3.310.0",
"microphone-stream": "^5.3.1",
"query-string": "^6.12.1",
"util": "^0.12.4"
},
"devDependencies": {
"@babel/core": "^7.21.8",
"@babel/preset-env": "^7.21.5",
"@babel/preset-react": "^7.18.6",
"babel-loader": "^9.1.2",
"babelify": "^10.0.0",
"browserify": "^17.0.0",
"eslint": "^8.10.0",
"eslint-config-airbnb": "^19.0.4",
"eslint-config-prettier": "^8.5.0",
"eslint-plugin-import": "^2.25.4",
"eslint-plugin-jsx-a11y": "^6.5.1",
"eslint-plugin-prettier": "^4.0.0",
"eslint-plugin-react": "^7.29.3",
"eslint-plugin-react-hooks": "^4.3.0",
"file-loader": "^6.2.0",
"prettier": "^2.7.1"
},
"browserify": {
"transform": [
[
"babelify",
{
"presets": [
"@babel/env"
]
}
]
]
}
Your src/lib/main.js
file will have
https://gist.github.com/shubhamambastha/838e81dbefd9fe3a647f42eeaebc7ae1#file-main-js
Then you need to run browserify
script, this will create aws_transcribe.js
file in public.
if you're using react then in your public/index.html
add browerified script
<script src="%PUBLIC_URL%/aws_transcribe.js" crossorigin="anonymous"></script>
Then you need to create awsTranscribe
context to use across your app.
Everything will be in above gist. Please check that.
Example to use
import React from 'react'
import { TranscribeContext } from '../../../context/awsTranscribe'
export const Example = () => {
const { awsTranscribe } = React.useContext(TranscribeContext)
React.useEffect(() => {
const handleTranscriptError = (event) => {
const errorMessage = event.detail.error
dispatch(
setAlert({
alertType: 'error',
alertMsg: 'Error in AWS Medical Transcribe',
alertOpen: true,
})
)
awsTranscribe.stop()
setRecordingIndex(false)
console.log('Error in AWS Transcribe: ', errorMessage)
}
document.addEventListener('transcript-error', handleTranscriptError)
return () => {
document.removeEventListener('transcript-error', handleTranscriptError)
}
}, [])
React.useEffect(() => {
const handleTranscript = (event) => {
const isFinal = event.detail.isFinal
const transcript = event.detail.transcript
const valuesRef = someValuesRef.current[recordingIndex]
if (isFinal) {
const updatedText = valuesRef?.notes + transcript + '\n'
setFieldValueRef.current(`recommendation[${recordingIndex}].notes`, updatedText)
setPartialTranscript('')
} else {
// need to add logic for partial transcript
const partialTranscript = transcript + '...' // Modify this based on your desired format
setPartialTranscript(partialTranscript)
}
}
if (setFieldValueRef.current) {
document.addEventListener('transcript', handleTranscript)
}
return () => {
document.removeEventListener('transcript', handleTranscript)
}
}, [recordingIndex])
const handleClose = (event, reason) => {
if (reason && reason == 'backdropClick') return
if (awsTranscribe.socket) {
awsTranscribe.stop()
}
dispatch(toggleRecommendationsModal(false))
}
const handleRecording = (checker, index) => {
if (checker === 'uncheck') {
setRecordingIndex(index)
awsTranscribe.start()
} else {
setRecordingIndex(false)
awsTranscribe.stop()
}
}
return (
<div> </div>
)
}
export default Example
I hope this help