I'm using the below code to convert speech to text using Azure Speech-to-Text service.I want to convert my audio files into text.Below is the code for the same:
import os
import azure.cognitiveservices.speech as speechsdk
def recognize_from_microphone():
# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
speech_config = speechsdk.SpeechConfig(subscription=my_key, region=my_region)
speech_config.speech_recognition_language="en-US"
audio_config = speechsdk.audio.AudioConfig(filename="C:\\Users\\DELL\\Desktop\\flowlly.com\\demo\\003. Class 3 - Monolith, Microservices, gRPC, Webhooks.mp4")
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
speech_recognition_result = speech_recognizer.recognize_once_async().get()
if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
print("Recognized: {}".format(speech_recognition_result.text))
elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_recognition_result.cancellation_details
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
print("Did you set the speech resource key and region values?")
recognize_from_microphone()
But Im getting this error when trying to run the transcriber:
File "C:\Users\DELL\Desktop\flowlly.com\demo\transcriber.py", line 48, in <module>
recognize_from_microphone()
File "C:\Users\DELL\Desktop\flowlly.com\demo\transcriber.py", line 18, in recognize_from_microphone
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\azure\cognitiveservices\speech\speech.py", line 1006, in __init__
_call_hr_fn(
File "C:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\azure\cognitiveservices\speech\interop.py", line 62, in _call_hr_fn
_raise_if_failed(hr)
File "C:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\azure\cognitiveservices\speech\interop.py", line 55, in _raise_if_failed
__try_get_error(_spx_handle(hr))
File "C:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\azure\cognitiveservices\speech\interop.py", line 50, in __try_get_error
raise RuntimeError(message)
RuntimeError: Exception with error code:
[CALL STACK BEGIN]
> pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- recognizer_create_speech_recognizer_from_config
- recognizer_create_speech_recognizer_from_config
[CALL STACK END]
Exception with an error code: 0xa (SPXERR_INVALID_HEADER)
I have installed the sdk for the same but its not working. What should I do now?
The currently supported format in Azure Speech-to-Text service is WAV (16 kHz or 8 kHz, 16-bit, and mono PCM).
filename="path/to/your/converted_file.wav"
import os
import azure.cognitiveservices.speech as speechsdk
def recognize_from_audio_file():
# Replace 'my_key' and 'my_region' with your actual subscription key and region
my_key = "YourSubscriptionKey"
my_region = "YourRegion"
speech_config = speechsdk.SpeechConfig(subscription=my_key, region=my_region)
speech_config.speech_recognition_language = "en-US"
# Provide the path to your WAV audio file
audio_file_path = r"C:\Users\samplest 3.wav"
audio_config = speechsdk.audio.AudioConfig(filename=audio_file_path)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
speech_recognition_result = speech_recognizer.recognize_once_async().get()
if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
print("Recognized: {}".format(speech_recognition_result.text))
elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_recognition_result.cancellation_details
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
print("Did you set the speech resource key and region values?")
recognize_from_audio_file()
Output: