javaazureregistrationspeech-to-texthttp-status-code-400

Azure STT is giving error HttpResponseProxy{HTTP/1.1 400 Bad Request


Azure STT is giving error : HttpResponseProxy{HTTP/1.1 400 Bad Request [Content-Type: text/plain; charset=utf-8, Date: Fri, 02 Feb 2024 14:33:46 GMT, Server: Kestrel, Transfer-Encoding: chunked, Strict-Transport-Security: max-age=31536000; includeSubDomains] ResponseEntityProxy{[Content-Type: text/plain; charset=utf-8,Chunked: true]}}. HttpClient httpclient = HttpClients.createDefault(); URIBuilder builder = new URIBuilder(env.getProperty("voice.text.api")); BASE64DecodedMultipartFile audio = new BASE64DecodedMultipartFile(Base64.getDecoder().decode(audioReq));

    builder.setParameter("language", sourceLang);
    URI uri = builder.build();
    HttpPost request = new HttpPost(uri);
    request.setHeader(CONTENT_TYPE, AUDIO_WAV_TYPE);
    request.setHeader("Ocp-Apim-Subscription-Key", env.getProperty("Voice.SubKey1"));
    //request.setHeader("Accept","application/json");
    request.setEntity(new FileEntity(convert(audio), ContentType.APPLICATION_OCTET_STREAM));
    HttpResponse response = null;
    try {
        response = httpclient.execute(request);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

Solution

  • The proxy 400 error occurs due to incorrect configuration or invalid data being sent to the Azure Speech-to-Text service.

    To fix the error, ensure that the correct endpoint URI for the Azure Speech-to-Text service is used, along with a valid speech key and properly formatted audio data in a .wav file.

    Below is the correct endpoint URI:

    https://<speech_region>.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US
    

    The following code has been updated with the correct endpoint URI and speech key, enabling it to convert speech to text without any errors.

    Code:

    import org.apache.http.HttpEntity;
    import org.apache.http.HttpResponse;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.methods.HttpPost;
    import org.apache.http.entity.FileEntity;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.client.utils.URIBuilder;
    import org.apache.http.entity.ContentType;
    import java.io.IOException;
    import java.net.URI;
    import java.net.URISyntaxException;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    import java.io.File;
    import java.util.Scanner;
    
    public class SpeechToTextExample {
    
        public static void main(String[] args) {
            HttpClient httpclient = HttpClients.createDefault();
            URIBuilder builder = null;
            try {
                builder = new URIBuilder("https://<speech_region>.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US");
            } catch (URISyntaxException e) {
                e.printStackTrace();
                return;
            }
            String filePath = "path/to/.wav file";
            String sourceLang = "en-US";
    
            builder.setParameter("language", sourceLang);
            URI uri = null;
            try {
                uri = builder.build();
            } catch (URISyntaxException e) {
                e.printStackTrace();
                return;
            }
            HttpPost request = new HttpPost(uri);
            request.setHeader("Content-Type", "audio/wav");
            request.setHeader("Ocp-Apim-Subscription-Key", "<speech_key>");
    
            try {
                File audioFile = new File(filePath);
                request.setEntity(new FileEntity(audioFile, ContentType.create("audio/wav")));
    
                HttpResponse response = httpclient.execute(request);
                HttpEntity entity = response.getEntity();
                if (entity != null) {
                    Scanner scanner = new Scanner(entity.getContent());
                    while (scanner.hasNextLine()) {
                        System.out.println(scanner.nextLine());
                    }
                    scanner.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
                return;
            }
        }
    }
    

    Output :

    It ran successfully, and the speech was converted to text as shown below.

    {"RecognitionStatus":"Success","Offset":1100000,"Duration":72600000,"DisplayText":"Hello, this is a test of the speech synthesis service."}
    

    enter image description here