I am interested in using Web API AudioEncoder to produce audio chunks compatible with some opus decoder e.g. this
const stream= await navigator.mediaDevices.getUserMedia({audio:{}});
const rec = new MediaRecorder(stream, {rate: 16000});
rec.ondataavailable = e => {
audioChunks.push(e.data);
console.log(e.data)
}
This will usually give the data when I stop recording, alternatively, I can do
var context = new AudioContext({sampleRate: 16000})
const mediaSource = context.createMediaSource(stream);
const bufferLength = 1280;
const node = context.createScriptProcessor(bufferLength, 2, 2);
node.onaudioprocess = (e) => {
const data = e.inputBUffer.getChannelData(0);
console.log(data);
}
source.connect(node);
node.connect(context.destination);
That is more appropriate to my application because it would give a chunk of data every time it reaches buffer length, in this case 2560 at 16kHz corresponds to 40ms.
How can I record the input in a Audio encoding capable browser and as I record produce opus frames that I can then decode using a standard opus decoder.
Putting the pieces together
{codec: 'opus'}
{sampleRate, latencyHint: 'interactive'}
. Latency hint interactive
gives low latency and is the default option, other options are balanced
and playback
that will accept higher latency to get lower power consumption.audioContext
.createMediaSourceaudioContext
.createMediaStreamDestination to 1.If you are interested here is a code snippet that (by the time I write this answer) will fail due to DOMException: Invalid security origin
/**
* Fill a dropdown with audio input options
* @param {HTMLSelectElement} select
*/
async function enumerateAudioDevices(select){
while (select.firstChild) {
select.removeChild(select.firstChild);
}
for(const deviceInfo of await navigator.mediaDevices.enumerateDevices()){
const option = document.createElement('option');
option.value = deviceInfo.deviceId;
if (deviceInfo.kind === 'audioinput') {
option.text = deviceInfo.label || `microphone ${select.length + 1}`;
select.appendChild(option);
}
}
}
/**
* Fill a dropdown with common sample rate options
* @param {HTMLSelectElement} select
*/
function fillCommonSampleRate(select){
while (select.firstChild) {
select.removeChild(select.firstChild);
}
for(const [value, label] of [
[16000, '16kHz'],
[24000, '24kHz'],
[32000, '32kHz'],
[44100, '44.1kHz'],
[48000, '48kHz']
]){
const option = document.createElement('option');
option.value = value;
option.text = label;
select.appendChild(option);
}
}
class CapturePipeline {
constructor(sourceId, codec='opus', sampleRate=16000){
this.sampleRate = sampleRate;
this.codec = codec;
this.sourceId = sourceId;
/**
* @type {(AudioData) => any}
*/
this.onrawdata = null;
/**
* @type {(EncodedAudioChunk, EncodingMetadata) => any}
*/
this.onencoded = null;
}
async connect(){
const mic = navigator.mediaDevices.getUserMedia(this.deviceId ? {
audio: { exact: {deviceId: this.deviceId}}
} : {audio: true})
/**
* @type {AudioContext}
*/
this.audioContext = new (AudioContext || webkitAudioContext)({
sampleRate: this.sampleRate,
numberOfChannels: 1,
latencyHint: 'interactive'
})
this.mic = await mic;
/**
* @type {MediaStreamAudioSourceNode}
*/
this.source = this.audioContext.createMediaStreamSource(this.mic)
/**
* @type {MediaStreamAudioDestinationNode}
*/
this.destination = this.audioContext.createMediaStreamDestination()
this.destination.channelCount = 1;
this.source.connect(this.destination)
/**
* @type {AudioEncoder}
*/
this.encoder = new AudioEncoder({
output: this.handleEncodedData.bind(this),
error: this.handleEncodingError.bind(this)
})
this.encoder.configure({
codec: this.codec,
numberOfChannels: 1,
sampleRate: this.sampleRate
})
/**
* @type {MediaStreamTrackProcessor}
*/
this.audioTrackProcessor = new MediaStreamTrackProcessor({
track: this.destination.stream.getAudioTracks()[0]
})
this.audioTrackProcessor.readable.pipeTo(new WritableStream({
write: this.handleRawData.bind(this)
}))
}
disconnect(){
this.source.disconnect();
delete this.audioTrackProcessor;
delete this.encoder;
delete this.destination;
delete this.mic;
delete this.source;
}
/**
* {EncodedAudioChunk} chunk
* {EncodingMetadata} metadata
*/
handleEncodedData(chunk, metadata){
if(this.onencoded){
this.onencoded(chunk, metadata)
}
const data = new ArrayBuffer(chunk.byteLength)
chunk.copyTo(data);
}
handleEncodingError(e){
console.log(e);
}
/**
* @param {AudioData} audioData
*/
handleRawData(audioData){
if(this.onrawdata){
this.onrawdata(audioData)
}
this.encoder.encode(audioData)
audioData.close()
}
}
//////////////////////////////////////////////////////////////////////
window.addEventListener('load', setup)
function setup(){
const audioSourceSelector = document.body.querySelector('select#audio-source')
const audioCodecSelector = document.body.querySelector('select#audio-codec')
const sampleFrequencySelector = document.body.querySelector('select#sample-frequency')
const startRecordingBtn = document.querySelector('button#start-recording')
const volumeBar = document.querySelector('div#volume-bar')
const encodedLengthBar = document.body.querySelector('div#encoded-length')
const rawLengthBar = document.body.querySelector('div#raw-length');
/**
* {CapturePipeline}
*/
let pipeline = null;
let rawLength = 0;
let encodedLength = 0;
let audioSampleArray = new Float32Array(0)
setInterval(() => {
if(rawLength > 0 && encodedLength > 0){
const ref = Math.max(encodedLength, rawLength)
encodedLengthBar.style.width = (90 * encodedLength / ref).toFixed(2) + '%';
encodedLengthBar.textContent = (encodedLength / 1024).toFixed(1) + 'kB'
rawLengthBar.style.width = (90 * rawLength / ref).toFixed(2) + '%';
rawLengthBar.textContent = (rawLength / 1024).toFixed(1) + 'kB'
}
}, 250)
enumerateAudioDevices(audioSourceSelector)
fillCommonSampleRate(sampleFrequencySelector)
for(const select of [
audioSourceSelector,
audioCodecSelector,
sampleFrequencySelector]
) {
select.addEventListener('change', e => {
if(pipeline)pipeline.disconnect();
pipeline = null;
})
}
startRecordingBtn.addEventListener('click', async e => {
pipeline = new CapturePipeline(
audioSourceSelector.value,
audioCodecSelector.value,
+sampleFrequencySelector.value
)
pipeline.onrawdata = (audioData) => {
rawLength += audioData.numberOfFrames * 2;
if(audioData.numberOfFrames > audioSampleArray.length){
audioSampleArray = new Float32Array(audioData.numberOfFrames)
}
audioData.copyTo(audioSampleArray, {planeIndex: 0});
const rms = Math.sqrt(audioSampleArray
.map(x => x*x)
.reduce((a,b) => a+b) / audioSampleArray.length);
volumeBar.style.width = (rms * 500) + 'px'
}
pipeline.onencoded = (chunk) => {
encodedLength += chunk.byteLength
}
await pipeline.connect()
})
}
// Unnecessary, but if it looks better, why not?
#banner-message {
background: #fff;
border-radius: 4px;
padding: 20px;
font-size: 25px;
text-align: center;
transition: all 0.2s;
margin: 0 auto;
width: 300px;
}
button {
background: #0084ff;
border: none;
border-radius: 5px;
padding: 8px 14px;
font-size: 15px;
color: #fff;
}
#banner-message.alt {
background: #0084ff;
color: #fff;
margin-top: 40px;
width: 200px;
}
#banner-message.alt button {
background: #fff;
color: #000;
}
.horizontal-heat {
width: 500px;
height: 10px;
background: linear-gradient(to right, green, yellow, red);
}
div.mask {
width:500px; height: 10px; overflow:hidden;
}
div.plot-bar {
background-color:#40c4ff;
border: 1px solid #404080;
overflow: visible;
white-space: pre;
}
<div>
<form>
<fieldset><legend>RECORD AUDIO</legend>
<select id="audio-source"></select>
<select id="sample-frequency"></select>
<select id="audio-codec">
<option value="opus" selected>Opus</option>
<option value="vorbis">Vorbis</option>
<option value="mp3">MP3</option>
<option value="alaw">A-law PCM</option>
<option value="ulaw">μ-law PCM</option>
<option value="pcm">Linear PCM</option>
</select>
</fieldset>
</form>
<button id="start-recording">start recording</button>
<hr>
Volume
<div id='volume-bar' class="mask">
<div class="horizontal-heat">
</div>
</div>
<div>
Encoded length
<div id="encoded-length" class="plot-bar"></div>
s16 PCM Wave length
<div id="raw-length" class="plot-bar" style="width: 90%"></div>
</div>
</div>