rustaudiohound

Resample CPAL to 16 kHz


I'm working on an audio project where I need to record audio from a microphone and resample it to 16 kHz mono. I plan to use the Rubato library along with CPAL for this task, and I'll be feeding the resampled audio into whisper.cpp.

Could someone provide guidance on the best way to handle the resampling process? Specifically, I am unsure about managing the fixed buffer size of the resampler. Should I process the audio data in chunks to accommodate this?

An example or any detailed directions on setting up and using Rubato with CPAL for this purpose would be greatly appreciated.

use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use cpal::{FromSample, Sample};
use eyre::{bail, Result};
use rubato::{
    Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType, WindowFunction,
};
use std::fs::File;
use std::io::BufWriter;
use std::sync::{Arc, Mutex};

fn main() -> Result<()> {
    let host = cpal::default_host();

    // Set up the input device and stream with the default input config.
    let device = host
        .default_input_device()
        .expect("failed to find input device");

    println!("Input device: {}", device.name()?);

    let config = device
        .default_input_config()
        .expect("Failed to get default input config");
    println!("Default input config: {:?}", config);

    // The WAV file we're recording to.
    const PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/recorded.wav");
    let spec = wav_spec_from_config(&config);
    let writer = hound::WavWriter::create(PATH, spec)?;
    let writer = Arc::new(Mutex::new(Some(writer)));

    let params = SincInterpolationParameters {
        sinc_len: 256,
        f_cutoff: 0.95,
        interpolation: SincInterpolationType::Linear,
        oversampling_factor: 256,
        window: WindowFunction::BlackmanHarris2,
    };
    let mut resampler = SincFixedIn::<f64>::new(
        16000 as f64 / config.sample_rate().0 as f64,
        2.0,
        params,
        1024,
        2,
    )
    .unwrap();

    // A flag to indicate that recording is in progress.
    println!("Begin recording...");

    // Run the input stream on a separate thread.
    let writer_2 = writer.clone();

    let err_fn = move |err| {
        eprintln!("an error occurred on stream: {}", err);
    };

    let stream = match config.sample_format() {
        cpal::SampleFormat::I8 => device.build_input_stream(
            &config.into(),
            move |data, _: &_| write_input_data::<i8, i8>(data, &writer_2, &mut resampler),
            err_fn,
            None,
        )?,
        cpal::SampleFormat::I16 => device.build_input_stream(
            &config.into(),
            move |data, _: &_| write_input_data::<i16, i16>(data, &writer_2, &mut resampler),
            err_fn,
            None,
        )?,
        cpal::SampleFormat::I32 => device.build_input_stream(
            &config.into(),
            move |data, _: &_| write_input_data::<i32, i32>(data, &writer_2, &mut resampler),
            err_fn,
            None,
        )?,
        cpal::SampleFormat::F32 => device.build_input_stream(
            &config.into(),
            move |data, _: &_| write_input_data::<f32, f32>(data, &writer_2, &mut resampler),
            err_fn,
            None,
        )?,
        sample_format => {
            bail!("Unsupported sample format '{sample_format}'")
        }
    };

    stream.play()?;

    // Let recording go for roughly three seconds.
    std::thread::sleep(std::time::Duration::from_secs(3));
    drop(stream);
    writer.lock().unwrap().take().unwrap().finalize()?;
    println!("Recording {} complete!", PATH);
    Ok(())
}

fn sample_format(format: cpal::SampleFormat) -> hound::SampleFormat {
    if format.is_float() {
        hound::SampleFormat::Float
    } else {
        hound::SampleFormat::Int
    }
}

fn wav_spec_from_config(config: &cpal::SupportedStreamConfig) -> hound::WavSpec {
    hound::WavSpec {
        channels: config.channels() as _,
        sample_rate: 16000 as _, // Write as 16khz always
        bits_per_sample: (config.sample_format().sample_size() * 8) as _,
        sample_format: sample_format(config.sample_format()),
    }
}

type WavWriterHandle = Arc<Mutex<Option<hound::WavWriter<BufWriter<File>>>>>;

fn write_input_data<T, U>(input: &[T], writer: &WavWriterHandle, resampler: &mut SincFixedIn<f64>)
where
    T: Sample,
    U: Sample + hound::Sample + FromSample<T>,
{
    if let Ok(mut guard) = writer.try_lock() {
        if let Some(writer) = guard.as_mut() {
            for &sample in input.iter() {
                let sample: U = U::from_sample(sample);
                writer.write_sample(sample).ok();
            }
        }
    }
}


Solution

  • I used samplerate crate successfully

    use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
    use cpal::{FromSample, Sample};
    use eyre::{bail, Result};
    use std::fs::File;
    use std::io::BufWriter;
    use std::sync::{Arc, Mutex};
    
    fn main() -> Result<()> {
        let host = cpal::default_host();
    
        // Set up the input device and stream with the default input config.
        let device = host
            .default_input_device()
            .expect("failed to find input device");
    
        println!("Input device: {}", device.name()?);
    
        let config = device
            .default_input_config()
            .expect("Failed to get default input config");
        println!("Default input config: {:?}", config);
    
        // The WAV file we're recording to.
        const PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/recorded.wav");
    
        // 16KHZ 16bit mono
        let spec = hound::WavSpec {
            channels: 1,
            sample_rate: 16000 as _, // Write as 16khz always
            bits_per_sample: 16,
            sample_format: hound::SampleFormat::Int,
        };
        let writer = hound::WavWriter::create(PATH, spec)?;
        let writer = Arc::new(Mutex::new(Some(writer)));
    
        // A flag to indicate that recording is in progress.
        println!("Begin recording...");
    
        // Run the input stream on a separate thread.
        let writer_2 = writer.clone();
    
        let err_fn = move |err| {
            eprintln!("an error occurred on stream: {}", err);
        };
    
        let sample_rate = config.sample_rate().0;
        let channels = config.channels();
        let stream = match config.sample_format() {
            cpal::SampleFormat::I8 => device.build_input_stream(
                &config.into(),
                move |data, _: &_| write_input_data::<i8, i8>(data, &writer_2, sample_rate, channels),
                err_fn,
                None,
            )?,
            cpal::SampleFormat::I16 => device.build_input_stream(
                &config.into(),
                move |data, _: &_| write_input_data::<i16, i16>(data, &writer_2, sample_rate, channels),
                err_fn,
                None,
            )?,
            cpal::SampleFormat::I32 => device.build_input_stream(
                &config.into(),
                move |data, _: &_| write_input_data::<i32, i32>(data, &writer_2, sample_rate, channels),
                err_fn,
                None,
            )?,
            cpal::SampleFormat::F32 => device.build_input_stream(
                &config.into(),
                move |data, _: &_| write_input_data::<f32, f32>(data, &writer_2, sample_rate, channels),
                err_fn,
                None,
            )?,
            sample_format => {
                bail!("Unsupported sample format '{sample_format}'")
            }
        };
    
        stream.play()?;
    
        // Let recording go for roughly three seconds.
        std::thread::sleep(std::time::Duration::from_secs(3));
        drop(stream);
        writer.lock().unwrap().take().unwrap().finalize()?;
        println!("Recording {} complete!", PATH);
        Ok(())
    }
    
    type WavWriterHandle = Arc<Mutex<Option<hound::WavWriter<BufWriter<File>>>>>;
    
    pub fn audio_resample(
        data: &[f32],
        sample_rate0: u32,
        sample_rate: u32,
        channels: u16,
    ) -> Vec<f32> {
        use samplerate::{convert, ConverterType};
        convert(
            sample_rate0 as _,
            sample_rate as _,
            channels as _,
            ConverterType::SincBestQuality,
            data,
        )
        .unwrap_or_default()
    }
    
    pub fn stereo_to_mono(stereo_data: &[f32]) -> Vec<f32> {
        // Ensure the input data length is even (it should be if it's valid stereo data)
        assert_eq!(
            stereo_data.len() % 2,
            0,
            "Stereo data length should be even."
        );
    
        let mut mono_data = Vec::with_capacity(stereo_data.len() / 2);
    
        // Iterate over stereo data in steps of 2 (one stereo sample pair at a time)
        for chunk in stereo_data.chunks_exact(2) {
            // Calculate the average of the two channels
            let average = (chunk[0] + chunk[1]) / 2.0;
            mono_data.push(average);
        }
    
        mono_data
    }
    
    fn write_input_data<T, U>(input: &[T], writer: &WavWriterHandle, sample_rate: u32, channels: u16)
    where
        T: Sample,
        U: Sample + hound::Sample + FromSample<T>,
    {
        // Convert the input samples to f32
        let samples: Vec<f32> = input
            .iter()
            .map(|s| s.to_float_sample().to_sample())
            .collect();
    
        // Resample the stereo audio to the desired sample rate
        let resampled_stereo: Vec<f32> = audio_resample(&samples, sample_rate, 16000, channels);
        let resampled_i16: Vec<i16> = resampled_stereo
            .iter()
            .map(|s| (s * i16::MAX as f32).round() as i16)
            .collect();
    
        let resampled_mono = if channels == 1 {
            resampled_i16
        } else {
            // convert from stereo to mono
            resampled_stereo
                .chunks(2) // Iterate over pairs of samples (left, right)
                .map(|chunk| {
                    let left = chunk[0];
                    let right = chunk[1];
                    let mono = (left + right) / 2.0; // Average the two channels
                    (mono * i16::MAX as f32).round() as i16
                })
                .collect()
        };
    
        // Write the mono data to the WAV file
        if let Ok(mut guard) = writer.try_lock() {
            if let Some(writer) = guard.as_mut() {
                for &sample in resampled_mono.iter() {
                    writer.write_sample(sample as i16).ok();
                }
            }
        }
    }