How can I get a continuous stream of samples from the JavaScript AudioAPI

I'd like to get a continuous stream of samples in JavaScript from the audio API. The only way I've found to get samples is through the MediaRecorder object in the JavaScript Audio API.

I set up my recorder like this:

var options = {
  mimeType: "audio/webm;codec=raw",
}
this.mediaRecorder = new MediaRecorder(stream, options);
this.mediaRecorder.ondataavailable = function (e) {
  this.decodeChunk(e.data);
}.bind(this);
this.mediaRecorder.start(/*timeslice=*/ 100 /*ms*/);

This gives me a callback 10 times a second with new data. All good so far.

The data is encoded, so I use audioCtx.decodeAudioData to process it:

let fileReader = new FileReader();
fileReader.onloadend = () => {
  let encodedData = fileReader.result;
  // console.log("Encoded length: " + encodedData.byteLength);
  this.audioCtx.decodeAudioData(encodedData,
    (decodedSamples) => {
      let newSamples = decodedSamples.getChannelData(0)
        .slice(this.firstChunkSize, decodedSamples.length);
      // The callback which handles the decodedSamples goes here.  All good.
      if (this.firstChunkSize == 0) {
        this.firstChunkSize = decodedSamples.length;
      }
    });
};

This all works fine too.

Setting up the data for the file reader is where it gets strange:

let blob;
if (!this.firstChunk) {
  this.firstChunk = chunk;
  blob = new Blob([chunk], { 'type': chunk.type });
} else {
  blob = new Blob([this.firstChunk, chunk], { 'type': chunk.type });
}
fileReader.readAsArrayBuffer(blob);

The first chunk works just fine, but the second and later chunks fail to decode unless I combine them with the first chunk. I'm guessing what is happening here is that the first chunk has a header that is required to decode the data. I remove the samples decoded from the first chunk after decoding them a second time. See this.firstChunkSize above.

This all executes without error, but the audio that I get back has a vibrato-like effect at 10Hz. A few hypotheses:

I have some simple mistake in my "firstChunkSize" and "splice" logic
The first chunk has some header which is causing the remaining data to be interpreted in a strange way.
There is some strange interaction with some option when creating the audio source (noise cancellation?)

Solution

Instead of MediaRecorder, this is easier to do with AudioWorklets. An audio worklet runs in its own thread and is given raw samples as a Float32Array. The downside is that audio worklets must be defined as external modules. So, for example:

// my-worklet.js
class MyWorklet extends AudioWorkletProcessor {
    first = true;

    process(inputs) {
        if (this.first) {
            const channels = inputs[0];
            const samples = channels[0];
            this.port.postMessage(`channels: ${channels.length}`);
            this.port.postMessage(`samples: ${samples.length}`);
            this.first = false;
        }
        return true;
    }
}

registerProcessor('my-worklet', MyWorklet);

// index.js
function main() {
    // Not allowed to get microphone before first user input.
    const mainDiv = document.getElementById('main');
    mainDiv.innerHTML += '<button id="start">start</button>'
    const startButton = document.getElementById('start');
    startButton.onclick = start;
}

async function start() {
    const audioContext = new AudioContext();
    const stream = await navigator.mediaDevices.getUserMedia({ audio: {
        // echo cancellation is on by default.
        echoCancellation: false,
    }});
    const micNode = audioContext.createMediaStreamSource(stream);
    await audioContext.audioWorklet.addModule('my-worklet.js');
    const worklet = new AudioWorkletNode(audioContext, 'my-worklet');
    micNode.connect(worklet);
    worklet.port.onmessage = (ev) => console.log(ev.data);
    await new Promise(r => setTimeout(r, 5_000));
    micNode.disconnect(worklet);
}

This outputs:

channels: 1
samples: 128

The 'samples' array contains the raw samples.