web-audio-api

How can I get a continuous stream of samples from the JavaScript AudioAPI


I'd like to get a continuous stream of samples in JavaScript from the audio API. The only way I've found to get samples is through the MediaRecorder object in the JavaScript Audio API.

I set up my recorder like this:

var options = {
  mimeType: "audio/webm;codec=raw",
}
this.mediaRecorder = new MediaRecorder(stream, options);
this.mediaRecorder.ondataavailable = function (e) {
  this.decodeChunk(e.data);
}.bind(this);
this.mediaRecorder.start(/*timeslice=*/ 100 /*ms*/);

This gives me a callback 10 times a second with new data. All good so far.

The data is encoded, so I use audioCtx.decodeAudioData to process it:

let fileReader = new FileReader();
fileReader.onloadend = () => {
  let encodedData = fileReader.result;
  // console.log("Encoded length: " + encodedData.byteLength);
  this.audioCtx.decodeAudioData(encodedData,
    (decodedSamples) => {
      let newSamples = decodedSamples.getChannelData(0)
        .slice(this.firstChunkSize, decodedSamples.length);
      // The callback which handles the decodedSamples goes here.  All good.
      if (this.firstChunkSize == 0) {
        this.firstChunkSize = decodedSamples.length;
      }
    });
};

This all works fine too.

Setting up the data for the file reader is where it gets strange:

let blob;
if (!this.firstChunk) {
  this.firstChunk = chunk;
  blob = new Blob([chunk], { 'type': chunk.type });
} else {
  blob = new Blob([this.firstChunk, chunk], { 'type': chunk.type });
}
fileReader.readAsArrayBuffer(blob);

The first chunk works just fine, but the second and later chunks fail to decode unless I combine them with the first chunk. I'm guessing what is happening here is that the first chunk has a header that is required to decode the data. I remove the samples decoded from the first chunk after decoding them a second time. See this.firstChunkSize above.

This all executes without error, but the audio that I get back has a vibrato-like effect at 10Hz. A few hypotheses:

  1. I have some simple mistake in my "firstChunkSize" and "splice" logic

  2. The first chunk has some header which is causing the remaining data to be interpreted in a strange way.

  3. There is some strange interaction with some option when creating the audio source (noise cancellation?)


Solution

  • Instead of MediaRecorder, this is easier to do with AudioWorklets. An audio worklet runs in its own thread and is given raw samples as a Float32Array. The downside is that audio worklets must be defined as external modules. So, for example:

    // my-worklet.js
    class MyWorklet extends AudioWorkletProcessor {
        first = true;
    
        process(inputs) {
            if (this.first) {
                const channels = inputs[0];
                const samples = channels[0];
                this.port.postMessage(`channels: ${channels.length}`);
                this.port.postMessage(`samples: ${samples.length}`);
                this.first = false;
            }
            return true;
        }
    }
    
    registerProcessor('my-worklet', MyWorklet);
    
    // index.js
    function main() {
        // Not allowed to get microphone before first user input.
        const mainDiv = document.getElementById('main');
        mainDiv.innerHTML += '<button id="start">start</button>'
        const startButton = document.getElementById('start');
        startButton.onclick = start;
    }
    
    async function start() {
        const audioContext = new AudioContext();
        const stream = await navigator.mediaDevices.getUserMedia({ audio: {
            // echo cancellation is on by default.
            echoCancellation: false,
        }});
        const micNode = audioContext.createMediaStreamSource(stream);
        await audioContext.audioWorklet.addModule('my-worklet.js');
        const worklet = new AudioWorkletNode(audioContext, 'my-worklet');
        micNode.connect(worklet);
        worklet.port.onmessage = (ev) => console.log(ev.data);
        await new Promise(r => setTimeout(r, 5_000));
        micNode.disconnect(worklet);
    }
    

    This outputs:

    channels: 1
    samples: 128
    

    The 'samples' array contains the raw samples.