javascriptaudiomp3lameazure-speech

Azure Speech javascript SDK: Output audio in mp3


I use the sdk.connection methods to capture audio from the speech to text recognizer. It creates PCM audio that I want to convert into MP3.

This is how connection is initialised:

const con = SpeechSDK.Connection.fromRecognizer(this.recognizer);
      con.messageSent = args => {
        // Only record outbound audio mesages that have data in them.
        if (
          args.message.path === "audio" &&
          args.message.isBinaryMessage &&
          args.message.binaryMessage !== null
        ) {
          this.wavFragments[this.wavFragmentCount++] =
            args.message.binaryMessage;
        }
      };

and this is the wav file build:

let byteCount = 0;
    for (let i = 0; i < this.wavFragmentCount; i++) {
      byteCount += this.wavFragments[i].byteLength;
    }
    // Output array.
    const sentAudio = new Uint8Array(byteCount);
    byteCount = 0;
    for (let i = 0; i < this.wavFragmentCount; i++) {
      sentAudio.set(new Uint8Array(this.wavFragments[i]), byteCount);
      byteCount += this.wavFragments[i].byteLength;
    } // Write the audio back to disk.
    // Set the file size in the wave header:
    const view = new DataView(sentAudio.buffer);
    view.setUint32(4, byteCount, true);
    view.setUint32(40, byteCount, true);

I tried using lamejs to convert 'sentAudio' into MP3.

import {lamejs} from "../../modules/lame.min.js";

const wavBlob = new Blob([sentAudio]);
const reader = new FileReader();
    reader.onload = evt => {
      const audioData = evt.target.result;
      const wav = lamejs.WavHeader.readHeader(new DataView(audioData));
      const mp3enc = new lamejs.Mp3Encoder(1, wav.sampleRate, 128);
      const samples = new Int8Array(audioData, wav.dataOffset, wav.dataLen / 2);
      let mp3Tmp = mp3enc.encodeBuffer(samples); // encode mp3

      // Push encode buffer to mp3Data variable
      const mp3Data = [];
      mp3Data.push(mp3Tmp);

      // Get end part of mp3
      mp3Tmp = mp3enc.flush();

      // Write last data to the output data, too
      // mp3Data contains now the complete mp3Data
      mp3Data.push(mp3Tmp);

      const blob = new Blob(mp3Data, { type: "audio/mp3" });
      this.createDownloadLink(blob, "mp3");
    };
    reader.readAsArrayBuffer(wavBlob);

MP3 Blob is empty or contains inaudible sounds. I have also tried using the 'encodeMP3' method described in this example but it gives the same output.

Any existing solutions to support this mp3 conversion ?


Solution

  • Regarding the issue, please refer to the following code.

    let byteCount = 0;
              for (let i= 0; i < wavFragmentCount; i++) {
                byteCount += wavFragments[i].byteLength;
              }
    
              // Output array.
              const sentAudio: Uint8Array = new Uint8Array(byteCount);
    
              byteCount = 0;
              for (let i: number = 0; i < wavFragmentCount; i++) {
                sentAudio.set(new Uint8Array(wavFragments[i]), byteCount);
                byteCount += wavFragments[i].byteLength;
              }
              // create wav file blob
              const view = new DataView(sentAudio.buffer);
              view.setUint32(4, byteCount, true);
              view.setUint32(40, byteCount, true);
              let wav = new Blob([view], { type: 'audio/wav' });
              // read wave file as base64
              var reader = new FileReader();
              reader.readAsDataURL(wav);
              reader.onload = () => {
                var base64String = reader.result.toString();           
                base64String = base64String.split(',')[1];
                
                // convert to buffer
                var binary_string = window.atob(base64String);
                var len = binary_string.length;
                var bytes = new Uint8Array(len);
                for (var i = 0; i < len; i++) {
                  bytes[i] = binary_string.charCodeAt(i);
                }
                
                // convert to mp3 with lamejs
                var wavHdr = lamejs.WavHeader.readHeader(
                  new DataView(bytes.buffer)
                );
                console.log(wavHdr);
                var wavSamples = new Int16Array(
                  bytes.buffer,
                  0,
                  wavHdr.dataLen / 2
                );
                let mp3 = this.wavToMp3(
                  wavHdr.channels,
                  wavHdr.sampleRate,
                  wavSamples
                );
                
                reader.readAsDataURL(mp3);
                reader.onload = () => {
                  var base64String = reader.result;
                  console.log(base64String);
                };
              };
    function wavToMp3(channels, sampleRate, samples) {
        console.log(channels);
        console.log(sampleRate);
        var buffer = [];
        var mp3enc = new lamejs.Mp3Encoder(channels, sampleRate, 128);
        var remaining = samples.length;
        var maxSamples = 1152;
        for (var i = 0; remaining >= maxSamples; i += maxSamples) {
          var mono = samples.subarray(i, i + maxSamples);
          var mp3buf = mp3enc.encodeBuffer(mono);
          if (mp3buf.length > 0) {
            buffer.push(new Int8Array(mp3buf));
          }
          remaining -= maxSamples;
        }
        var d = mp3enc.flush();
        if (d.length > 0) {
          buffer.push(new Int8Array(d));
        }
    
        console.log('done encoding, size=', buffer.length);
        var blob = new Blob(buffer, { type: 'audio/mp3' });
        var bUrl = window.URL.createObjectURL(blob);
        console.log('Blob created, URL:', bUrl);
        return blob;
      }