Azure Speech SDK has a limitation where it only supports detecting up to 4 languages simultaneously in the "DetectAudioAtStart" mode. To work around this limitation, I create batches of 4 languages from the languages_to_detect list and attempt to detect the language for each batch.But it is not able to recogize and giving me wrong answer. I am passing a bangali audio file and it is saying hindi. which is wrong. Below is the code for your reference:
import azure.cognitiveservices.speech as speechsdk
subscription_key = "00000000000000000000000000"
service_region = "westus"
audio_file_path = "C:\\yogesh_folder\\speech_bangla.wav"
# List of all languages to detect
languages_to_detect = ["en-US", "ml-IN", "ta-IN", "te-IN", "gu-IN", "kn-IN", "mr-IN", "pa-IN", "bn-IN", "hi-IN"]
# Configure speech recognition
speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)
# Audio configuration
audio_config = speechsdk.audio.AudioConfig(filename=audio_file_path)
# Initialize detected language
detected_language = None
# Iterate through batches of 4 languages
for i in range(0, len(languages_to_detect), 4):
# Slice the batch of languages
batch_languages = languages_to_detect[i:i+4]
# Configure auto-detection of source language for current batch
auto_detect_source_language_config = speechsdk.languageconfig.AutoDetectSourceLanguageConfig(
languages=batch_languages
)
# Create a speech recognizer instance for current batch
speech_recognizer = speechsdk.SpeechRecognizer(
speech_config=speech_config,
auto_detect_source_language_config=auto_detect_source_language_config,
audio_config=audio_config
)
# Perform recognition
print(f"Detecting speech in languages: {batch_languages}")
result = speech_recognizer.recognize_once()
# Check result
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
detected_language = result.properties.get(speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult)
print(f"Detected language: {detected_language}")
break # Exit loop if language is detected
# If no language is detected, provide feedback
if detected_language is None:
print("No language detected.")
I am passing a bangali audio file and it is saying hindi. which is wrong. Below is the code for your reference:
App.py:
# Log recognized text
recognized_text = result.text
print(f"Recognized text: {recognized_text}")
break # Exit loop if language is detected
elif result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized")
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print(f"Speech Recognition canceled: {cancellation_details.reason}")
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print(f"Error details: {cancellation_details.error_details}")
# If no language is detected, provide feedback
if detected_language is None:
print("No language detected.")
Result: