iosswiftsignal-processing

Real time audio processing iOS


I'm trying to get sound from the microphone, process the sound with some function and then output the processed sound to the speakers. I need to be able to process buffers of 1024 samples. but for now I get only choppy sound. theres a better way to process sound then using install tap for real time processing?

in this code example I don't have any processing but I still get choppy sounds.

    private func setupAudioEngine() {
        do {
            let audioSession = AVAudioSession.sharedInstance()
            try audioSession.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .allowBluetooth])
            try audioSession.setActive(true)
        } catch {
            errorMessage = "Failed to set up audio session: \(error.localizedDescription)"
            print(errorMessage ?? "")
            return
        }
        
        // Get the input format
        let inputNode = audioEngine.inputNode
        let inputFormat = inputNode.outputFormat(forBus: 0)
        
        // Attach nodes
        audioEngine.attach(mixerNode)
        audioEngine.attach(playerNode)
        
        // Set mixer format to match input
        mixerNode.outputFormat(forBus: 0)
        
        // Connect input to mixer
        audioEngine.connect(inputNode, to: mixerNode, format: nil)
        
        // Connect mixer to output
        audioEngine.connect(mixerNode, to: audioEngine.mainMixerNode, format: nil)
        
        // Connect player to mixer (not directly to output)
        audioEngine.connect(playerNode, to: audioEngine.outputNode, format: nil)
        
        let format = AVAudioFormat(
            standardFormatWithSampleRate: inputFormat.sampleRate,
            channels: 2
        )
        
        // Install tap on mixer node to process audio
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: format) { [weak self] (buffer, audioTime) in
            self!.scheduleProcessedBuffer(buffer)
        }
        
        // Prepare the engine before starting
        audioEngine.prepare()
    }
    
    
    private func scheduleProcessedBuffer(_ buffer: AVAudioPCMBuffer) {
        if playerNode.isPlaying {
            playerNode.scheduleBuffer(buffer, at: nil, options: .interrupts) {
                // Optional: Callback when buffer finishes playing
            }
        }
    }

EDIT: I got this code that runs realtime (audiosink to audiosource), the only problem is that I need buffers with 1024 samples, and I can't enforce the buffer size.

import SwiftUI
import AVFoundation
import Combine

class CircularAudioFrameQueue {
    private var queue: [AVAudioPCMBuffer]
    private var headIndex = 0
    private var tailIndex = 0
    private let maxSize: Int
    private let format: AVAudioFormat
    
    init(format: AVAudioFormat, size: Int = 10) {
        self.maxSize = size
        self.format = format
        self.queue = (0..<size).compactMap { _ in AVAudioPCMBuffer(pcmFormat: format, frameCapacity: 1024) }
    }
    
    // Enqueue a buffer
    func enqueue(_ buffer: AVAudioPCMBuffer) {
        queue[headIndex] = buffer
        headIndex = (headIndex + 1) % maxSize
        if headIndex == tailIndex {
            tailIndex = (tailIndex + 1) % maxSize  // Overwrite oldest frame if buffer is full
        }
    }
    
    // Dequeue a buffer
    func dequeue() -> AVAudioPCMBuffer? {
        guard tailIndex != headIndex else { return nil }  // Empty queue check
        let buffer = queue[tailIndex]
        tailIndex = (tailIndex + 1) % maxSize
        return buffer
    }
}

class AudioRecorderManager: NSObject, ObservableObject {
    private var audioEngine = AVAudioEngine()
    private var noiseCanceller: ONNXNoiseCanceller?
    private var frameQueue: CircularAudioFrameQueue?
    
    @Published var isRecording = false
    @Published var errorMessage: String?
    
    override init() {
        super.init()
        
        // Initialize noise cancellation model
        noiseCanceller = ONNXNoiseCanceller()
                
        setupAudioEngine()
    }
    
    private func setupAudioEngine() {
        do {
            let audioSession = AVAudioSession.sharedInstance()
            try audioSession.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .allowBluetooth])
            try audioSession.setActive(true)
        } catch {
            errorMessage = "Failed to set up audio session: \(error.localizedDescription)"
            print(errorMessage ?? "")
            return
        }
        
        // Get the input format
        let inputNode = audioEngine.inputNode
        let inputFormat = inputNode.outputFormat(forBus: 0)
        
        // Create a consistent format for processing
        guard let processingFormat = AVAudioFormat(
            standardFormatWithSampleRate: 44100,
            channels: 2
        )else {
            errorMessage = "Failed to create audio format"
            print(errorMessage ?? "")
            return
        }
        frameQueue = CircularAudioFrameQueue(format:processingFormat)
        
        // Create a sink node for low-latency processing
        let sinkNode = AVAudioSinkNode { [weak self] (timestamp, frameCount, audioBufferList) -> OSStatus in
            guard let self = self else { return noErr }
            
            // Get buffer pointer for direct processing
            let ablPointer = UnsafeMutableAudioBufferListPointer(UnsafeMutablePointer(mutating: audioBufferList))
            // Create a temporary buffer to hold input for noise cancellation
            let pcmBuffer = AVAudioPCMBuffer(pcmFormat: processingFormat, frameCapacity: frameCount)!
            pcmBuffer.frameLength = frameCount
        
            // Copy data from audioBufferList to pcmBuffer for noise cancellation processing
            for bufferIndex in 0..<min(ablPointer.count, Int(processingFormat.channelCount)) {
                let inBuffer = ablPointer[bufferIndex]
                let outBuffer = pcmBuffer.floatChannelData?[bufferIndex]
                if let inData = inBuffer.mData?.assumingMemoryBound(to: Float.self),
                   let outData = outBuffer {
                    // Process input data (e.g., noise cancellation)
                    for frame in 0..<Int(frameCount) {
                        outData[frame] = inData[frame] * 1  // Example processing (e.g., gain adjustment)
                    }
                }
            }
            
            // Store processed frames in the circular queue
            self.frameQueue?.enqueue(pcmBuffer)
            
            return noErr
        }
        
        // Create a source node for audio output
        let sourceNode = AVAudioSourceNode { [weak self] (silence, timeStamp, frameCount, audioBufferList) -> OSStatus in
            guard let self = self else { return noErr }
            
            // Pull frames from the circular queue
            if let buffer = self.frameQueue?.dequeue() {
                // Copy frames to the provided buffer
                let ablPointer = UnsafeMutableAudioBufferListPointer(UnsafeMutablePointer(mutating: audioBufferList))
                
                for bufferIndex in 0..<min(ablPointer.count, Int(buffer.format.channelCount)) {
                    let outBuffer = ablPointer[bufferIndex]
                    if let outData = outBuffer.mData?.assumingMemoryBound(to: Float.self) {
                        // Copy processed frames into the output buffer
                        let pcmChannelData = buffer.floatChannelData?[bufferIndex]
                        if let pcmData = pcmChannelData {
                            for frame in 0..<Int(frameCount) {
                                outData[frame] = pcmData[frame]
                            }
                        }
                    }
                }
            }
            
            return noErr
        }
        
        // Attach nodes and set up connections
        audioEngine.attach(sinkNode)
        audioEngine.attach(sourceNode)
        
        audioEngine.connect(audioEngine.inputNode, to: sinkNode, format: inputFormat)
        audioEngine.connect(sourceNode, to: audioEngine.outputNode, format: inputFormat)
        
    }

Solution

  • I found a why to do what I was seeking.

    Input audio to sinknode

    sink node to circular buffer

    player node reading buffers from the circular buffer

    CODE:

    import SwiftUI
    import AVFoundation
    import Combine
    
    
    class CircularAudioFrameQueue {
        private var buffers: [AVAudioPCMBuffer]
        private var bufferSampleOffsets: [Int]  // Tracks how many samples we've consumed from each buffer
        private var headIndex = 0
        private var tailIndex = 0
        private var totalSamples = 0
        private let maxBuffers: Int
        private let format: AVAudioFormat
        
        init(format: AVAudioFormat, maxBuffers: Int = 100) {
            self.format = format
            self.maxBuffers = maxBuffers
            self.buffers = Array(repeating: AVAudioPCMBuffer(pcmFormat: format, frameCapacity: 0)!, count: maxBuffers)
            self.bufferSampleOffsets = Array(repeating: 0, count: maxBuffers)
        }
        
        // Enqueue a buffer of any size
        func enqueue(_ buffer: AVAudioPCMBuffer) {
            // Create a copy of the buffer to store in the queue
            guard let copy = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: buffer.frameLength) else { return }
            copy.frameLength = buffer.frameLength
            
            // Copy the data
            for channel in 0..<Int(format.channelCount) {
                if let srcData = buffer.floatChannelData?[channel],
                   let destData = copy.floatChannelData?[channel] {
                    for frame in 0..<Int(buffer.frameLength) {
                        destData[frame] = srcData[frame]
                    }
                }
            }
            
            // If the queue is full, remove the oldest buffer(s)
            while size >= maxBuffers {
                removeOldestBuffer()
            }
            
            // Add the new buffer
            buffers[headIndex] = copy
            bufferSampleOffsets[headIndex] = 0
            headIndex = (headIndex + 1) % maxBuffers
            totalSamples += Int(copy.frameLength)
        }
        
        // Dequeue exactly 'requestedSamples' number of samples across potentially multiple buffers
        func dequeue(requestedSamples: Int) -> AVAudioPCMBuffer? {
            guard totalSamples > 0, requestedSamples > 0 else { return nil }
            
            // Create a buffer to hold the requested samples
            let samplesToCopy = min(requestedSamples, totalSamples)
            guard let result = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(samplesToCopy)) else { return nil }
            result.frameLength = AVAudioFrameCount(samplesToCopy)
            
            var samplesCopied = 0
            
            while samplesCopied < samplesToCopy {
                // Check if we still have buffers to process
                if size == 0 {
                    break
                }
                
                let currentBuffer = buffers[tailIndex]
                let currentOffset = bufferSampleOffsets[tailIndex]
                let remainingInBuffer = Int(currentBuffer.frameLength) - currentOffset
                let samplesNeeded = samplesToCopy - samplesCopied
                let samplesToCopyFromThisBuffer = min(remainingInBuffer, samplesNeeded)
                
                // Copy samples from the current buffer
                for channel in 0..<Int(format.channelCount) {
                    if let srcData = currentBuffer.floatChannelData?[channel],
                       let destData = result.floatChannelData?[channel] {
                        for i in 0..<samplesToCopyFromThisBuffer {
                            destData[samplesCopied + i] = srcData[currentOffset + i]
                        }
                    }
                }
                
                samplesCopied += samplesToCopyFromThisBuffer
                totalSamples -= samplesToCopyFromThisBuffer
                
                // Update the offset or remove the buffer if fully consumed
                if currentOffset + samplesToCopyFromThisBuffer >= Int(currentBuffer.frameLength) {
                    // Buffer fully consumed, remove it
                    tailIndex = (tailIndex + 1) % maxBuffers
                } else {
                    // Update offset
                    bufferSampleOffsets[tailIndex] += samplesToCopyFromThisBuffer
                }
            }
            
            return result
        }
        
        // Has enough samples (1024 or more)
        var hasEnoughSamples: Bool {
            return totalSamples >= 1024
        }
        
        // Get the oldest 1024 samples
        func getOldest1024Samples() -> AVAudioPCMBuffer? {
            return dequeue(requestedSamples: 1024)
        }
        
        // Get the oldest 1024 samples
        func getOldest2048Samples() -> AVAudioPCMBuffer? {
            return dequeue(requestedSamples: 2048)
        }
        
        // Get available samples count
        var availableSamples: Int {
            return totalSamples
        }
        
        // Helper method to remove the oldest buffer
        private func removeOldestBuffer() {
            guard size > 0 else { return }
            
            let removedBuffer = buffers[tailIndex]
            let remainingSamples = Int(removedBuffer.frameLength) - bufferSampleOffsets[tailIndex]
            totalSamples -= remainingSamples
            
            tailIndex = (tailIndex + 1) % maxBuffers
        }
        
        // The size (number of buffers) in the queue
        private var size: Int {
            let diff = headIndex - tailIndex
            return diff >= 0 ? diff : diff + maxBuffers
        }
    }
    
    class AudioRecorderManager: NSObject, ObservableObject {
        private var audioEngine = AVAudioEngine()
        private var noiseCanceller: ONNXNoiseCanceller?
        private var frameQueue: CircularAudioFrameQueue?
        private var playerNode = AVAudioPlayerNode()
        private var playbackTimer: Timer?
        
        @Published var isRecording = false
        @Published var isPlaying = false
        @Published var errorMessage: String?
        
        override init() {
            super.init()
            
            // Initialize noise cancellation model
            noiseCanceller = ONNXNoiseCanceller()
                    
            setupAudioEngine()
        }
        
        private func setupAudioEngine() {
            do {
                let audioSession = AVAudioSession.sharedInstance()
                try audioSession.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .allowBluetooth])
                try audioSession.setActive(true)
            } catch {
                errorMessage = "Failed to set up audio session: \(error.localizedDescription)"
                print(errorMessage ?? "")
                return
            }
            
            // Get the input format
            let inputNode = audioEngine.inputNode
            let inputFormat = inputNode.outputFormat(forBus: 0)
            
            // Create a consistent format for processing
            guard let processingFormat = AVAudioFormat(
                standardFormatWithSampleRate: 48000,
                channels: 2
            ) else {
                errorMessage = "Failed to create audio format"
                print(errorMessage ?? "")
                return
            }
            
            
            // Initialize frame queue with capacity for 44100 samples
            frameQueue = CircularAudioFrameQueue(format: processingFormat, maxBuffers: 100)
            
            // Create a sink node for low-latency processing
            let sinkNode = AVAudioSinkNode { [weak self] (timestamp, frameCount, audioBufferList) -> OSStatus in
                guard let self = self else { return noErr }
                
                // Get buffer pointer for direct processing
                let ablPointer = UnsafeMutableAudioBufferListPointer(UnsafeMutablePointer(mutating: audioBufferList))
                // Create a temporary buffer with 2 channels
                let outputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: processingFormat.sampleRate, channels: 2, interleaved: false)!
                let pcmBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: frameCount)!
                pcmBuffer.frameLength = frameCount
                
                // Copy data from input to both output channels
                if let inBuffer = ablPointer.first,
                   let inData = inBuffer.mData?.assumingMemoryBound(to: Float.self),
                   let leftOutBuffer = pcmBuffer.floatChannelData?[0],
                   let rightOutBuffer = pcmBuffer.floatChannelData?[1] {
                    for frame in 0..<Int(frameCount) {
                        let sample = inData[frame]
                        leftOutBuffer[frame] = sample
                        rightOutBuffer[frame] = sample
                    }
                }
                
                // Store processed frames in the circular queue
                self.frameQueue?.enqueue(pcmBuffer)
                
                return noErr
            }
            
            // Set up the player node
            audioEngine.attach(playerNode)
            audioEngine.attach(sinkNode)
    
            // Connect the input to the sink node
            audioEngine.connect(audioEngine.inputNode, to: sinkNode, format: inputFormat)
            
            // Connect player node to output
            audioEngine.connect(playerNode, to: audioEngine.outputNode, format: processingFormat)
        }
        
        func toggleRecording() {
            guard AVAudioSession.sharedInstance().recordPermission == .granted else {
                requestMicrophonePermission()
                return
            }
            
            if isRecording {
                stopRecording()
            } else {
                startRecording()
            }
        }
        
        func togglePlayback() {
            if isPlaying {
                stopPlayback()
            } else {
                startPlayback()
            }
        }
        
        private func startRecording() {
            do {
                // If engine isn't running, start it
                if !audioEngine.isRunning {
                    try audioEngine.start()
                }
                isRecording = true
                errorMessage = nil
            } catch {
                errorMessage = "Error starting audio engine: \(error.localizedDescription)"
                isRecording = false
                print(errorMessage ?? "")
            }
        }
        
        private func stopRecording() {
            if audioEngine.isRunning && !isPlaying {
                audioEngine.stop()
            }
            isRecording = false
        }
        
        private func startPlayback() {
            do {
                // Make sure engine is running
                if !audioEngine.isRunning {
                    try audioEngine.start()
                }
                
                playerNode.play()
                isPlaying = true
                
                // Start a timer to check for available buffers to play
                playbackTimer = Timer.scheduledTimer(withTimeInterval: 0.001, repeats: true) { [weak self] _ in
                    self?.scheduleNextBuffer()
                }
                
            } catch {
                errorMessage = "Error starting playback: \(error.localizedDescription)"
                print(errorMessage ?? "")
            }
        }
        
        private func stopPlayback() {
            playerNode.stop()
            playbackTimer?.invalidate()
            playbackTimer = nil
            isPlaying = false
            
            if !isRecording && audioEngine.isRunning {
                audioEngine.stop()
            }
        }
        
        private func scheduleNextBuffer() {
            // Only schedule if we have at least 1024 samples available
            guard let frameQueue = frameQueue, frameQueue.hasEnoughSamples else {
                return
            }
            
            // Check if player node needs more buffers
            if !playerNode.isPlaying {
                playerNode.play()
            }
            
            // Get the oldest buffer from the queue
            if let buffer = frameQueue.getOldest1024Samples() {
                let len = buffer.frameLength
                // Process audio with noise cancellation
                let processedBuffer = noiseCanceller!.processAudioChunk(buffer)
                    // Schedule the processed buffer for playback
                let finalBuffer = noiseCanceller!.processOutputTensor(processedBuffer!, audioEngine: self.audioEngine, noiseProfile: buffer)
    
                playerNode.scheduleBuffer(finalBuffer!, at:nil,options:.interruptsAtLoop)
                    // Buffer completed playing
                    DispatchQueue.main.async {
                        self.scheduleNextBuffer()
                    }
                }
            }
        
        
        private func requestMicrophonePermission() {
            AVAudioSession.sharedInstance().requestRecordPermission { [weak self] granted in
                DispatchQueue.main.async {
                    if granted {
                        self?.toggleRecording()
                    } else {
                        self?.errorMessage = "Microphone permission denied"
                    }
                }
            }
        }
        
        deinit {
            playbackTimer?.invalidate()
            if audioEngine.isRunning {
                audioEngine.stop()
            }
        }
    }
    
    struct AudioRecorderView: View {
        @StateObject private var audioManager = AudioRecorderManager()
        
        var body: some View {
            VStack {
                if let errorMessage = audioManager.errorMessage {
                    Text(errorMessage)
                        .foregroundColor(.red)
                        .padding()
                }
                
                Text(audioManager.isRecording ? "Recording" : "Paused")
                    .foregroundColor(audioManager.isRecording ? .red : .gray)
                    .padding()
                
                Button(action: {
                    audioManager.toggleRecording()
                }) {
                    Text(audioManager.isRecording ? "Stop Recording" : "Start Recording")
                        .foregroundColor(.white)
                        .padding()
                        .background(audioManager.isRecording ? Color.red : Color.blue)
                        .cornerRadius(10)
                }
                .padding()
                
                Button(action: {
                    audioManager.togglePlayback()
                }) {
                    Text(audioManager.isPlaying ? "Stop Playback" : "Start Playback")
                        .foregroundColor(.white)
                        .padding()
                        .background(audioManager.isPlaying ? Color.orange : Color.green)
                        .cornerRadius(10)
                }
            }
        }
    }