I'm trying to get sound from the microphone, process the sound with some function and then output the processed sound to the speakers. I need to be able to process buffers of 1024 samples. but for now I get only choppy sound. theres a better way to process sound then using install tap for real time processing?
in this code example I don't have any processing but I still get choppy sounds.
private func setupAudioEngine() {
do {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .allowBluetooth])
try audioSession.setActive(true)
} catch {
errorMessage = "Failed to set up audio session: \(error.localizedDescription)"
print(errorMessage ?? "")
return
}
// Get the input format
let inputNode = audioEngine.inputNode
let inputFormat = inputNode.outputFormat(forBus: 0)
// Attach nodes
audioEngine.attach(mixerNode)
audioEngine.attach(playerNode)
// Set mixer format to match input
mixerNode.outputFormat(forBus: 0)
// Connect input to mixer
audioEngine.connect(inputNode, to: mixerNode, format: nil)
// Connect mixer to output
audioEngine.connect(mixerNode, to: audioEngine.mainMixerNode, format: nil)
// Connect player to mixer (not directly to output)
audioEngine.connect(playerNode, to: audioEngine.outputNode, format: nil)
let format = AVAudioFormat(
standardFormatWithSampleRate: inputFormat.sampleRate,
channels: 2
)
// Install tap on mixer node to process audio
inputNode.installTap(onBus: 0, bufferSize: 1024, format: format) { [weak self] (buffer, audioTime) in
self!.scheduleProcessedBuffer(buffer)
}
// Prepare the engine before starting
audioEngine.prepare()
}
private func scheduleProcessedBuffer(_ buffer: AVAudioPCMBuffer) {
if playerNode.isPlaying {
playerNode.scheduleBuffer(buffer, at: nil, options: .interrupts) {
// Optional: Callback when buffer finishes playing
}
}
}
EDIT: I got this code that runs realtime (audiosink to audiosource), the only problem is that I need buffers with 1024 samples, and I can't enforce the buffer size.
import SwiftUI
import AVFoundation
import Combine
class CircularAudioFrameQueue {
private var queue: [AVAudioPCMBuffer]
private var headIndex = 0
private var tailIndex = 0
private let maxSize: Int
private let format: AVAudioFormat
init(format: AVAudioFormat, size: Int = 10) {
self.maxSize = size
self.format = format
self.queue = (0..<size).compactMap { _ in AVAudioPCMBuffer(pcmFormat: format, frameCapacity: 1024) }
}
// Enqueue a buffer
func enqueue(_ buffer: AVAudioPCMBuffer) {
queue[headIndex] = buffer
headIndex = (headIndex + 1) % maxSize
if headIndex == tailIndex {
tailIndex = (tailIndex + 1) % maxSize // Overwrite oldest frame if buffer is full
}
}
// Dequeue a buffer
func dequeue() -> AVAudioPCMBuffer? {
guard tailIndex != headIndex else { return nil } // Empty queue check
let buffer = queue[tailIndex]
tailIndex = (tailIndex + 1) % maxSize
return buffer
}
}
class AudioRecorderManager: NSObject, ObservableObject {
private var audioEngine = AVAudioEngine()
private var noiseCanceller: ONNXNoiseCanceller?
private var frameQueue: CircularAudioFrameQueue?
@Published var isRecording = false
@Published var errorMessage: String?
override init() {
super.init()
// Initialize noise cancellation model
noiseCanceller = ONNXNoiseCanceller()
setupAudioEngine()
}
private func setupAudioEngine() {
do {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .allowBluetooth])
try audioSession.setActive(true)
} catch {
errorMessage = "Failed to set up audio session: \(error.localizedDescription)"
print(errorMessage ?? "")
return
}
// Get the input format
let inputNode = audioEngine.inputNode
let inputFormat = inputNode.outputFormat(forBus: 0)
// Create a consistent format for processing
guard let processingFormat = AVAudioFormat(
standardFormatWithSampleRate: 44100,
channels: 2
)else {
errorMessage = "Failed to create audio format"
print(errorMessage ?? "")
return
}
frameQueue = CircularAudioFrameQueue(format:processingFormat)
// Create a sink node for low-latency processing
let sinkNode = AVAudioSinkNode { [weak self] (timestamp, frameCount, audioBufferList) -> OSStatus in
guard let self = self else { return noErr }
// Get buffer pointer for direct processing
let ablPointer = UnsafeMutableAudioBufferListPointer(UnsafeMutablePointer(mutating: audioBufferList))
// Create a temporary buffer to hold input for noise cancellation
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: processingFormat, frameCapacity: frameCount)!
pcmBuffer.frameLength = frameCount
// Copy data from audioBufferList to pcmBuffer for noise cancellation processing
for bufferIndex in 0..<min(ablPointer.count, Int(processingFormat.channelCount)) {
let inBuffer = ablPointer[bufferIndex]
let outBuffer = pcmBuffer.floatChannelData?[bufferIndex]
if let inData = inBuffer.mData?.assumingMemoryBound(to: Float.self),
let outData = outBuffer {
// Process input data (e.g., noise cancellation)
for frame in 0..<Int(frameCount) {
outData[frame] = inData[frame] * 1 // Example processing (e.g., gain adjustment)
}
}
}
// Store processed frames in the circular queue
self.frameQueue?.enqueue(pcmBuffer)
return noErr
}
// Create a source node for audio output
let sourceNode = AVAudioSourceNode { [weak self] (silence, timeStamp, frameCount, audioBufferList) -> OSStatus in
guard let self = self else { return noErr }
// Pull frames from the circular queue
if let buffer = self.frameQueue?.dequeue() {
// Copy frames to the provided buffer
let ablPointer = UnsafeMutableAudioBufferListPointer(UnsafeMutablePointer(mutating: audioBufferList))
for bufferIndex in 0..<min(ablPointer.count, Int(buffer.format.channelCount)) {
let outBuffer = ablPointer[bufferIndex]
if let outData = outBuffer.mData?.assumingMemoryBound(to: Float.self) {
// Copy processed frames into the output buffer
let pcmChannelData = buffer.floatChannelData?[bufferIndex]
if let pcmData = pcmChannelData {
for frame in 0..<Int(frameCount) {
outData[frame] = pcmData[frame]
}
}
}
}
}
return noErr
}
// Attach nodes and set up connections
audioEngine.attach(sinkNode)
audioEngine.attach(sourceNode)
audioEngine.connect(audioEngine.inputNode, to: sinkNode, format: inputFormat)
audioEngine.connect(sourceNode, to: audioEngine.outputNode, format: inputFormat)
}
I found a why to do what I was seeking.
Input audio to sinknode
sink node to circular buffer
player node reading buffers from the circular buffer
CODE:
import SwiftUI
import AVFoundation
import Combine
class CircularAudioFrameQueue {
private var buffers: [AVAudioPCMBuffer]
private var bufferSampleOffsets: [Int] // Tracks how many samples we've consumed from each buffer
private var headIndex = 0
private var tailIndex = 0
private var totalSamples = 0
private let maxBuffers: Int
private let format: AVAudioFormat
init(format: AVAudioFormat, maxBuffers: Int = 100) {
self.format = format
self.maxBuffers = maxBuffers
self.buffers = Array(repeating: AVAudioPCMBuffer(pcmFormat: format, frameCapacity: 0)!, count: maxBuffers)
self.bufferSampleOffsets = Array(repeating: 0, count: maxBuffers)
}
// Enqueue a buffer of any size
func enqueue(_ buffer: AVAudioPCMBuffer) {
// Create a copy of the buffer to store in the queue
guard let copy = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: buffer.frameLength) else { return }
copy.frameLength = buffer.frameLength
// Copy the data
for channel in 0..<Int(format.channelCount) {
if let srcData = buffer.floatChannelData?[channel],
let destData = copy.floatChannelData?[channel] {
for frame in 0..<Int(buffer.frameLength) {
destData[frame] = srcData[frame]
}
}
}
// If the queue is full, remove the oldest buffer(s)
while size >= maxBuffers {
removeOldestBuffer()
}
// Add the new buffer
buffers[headIndex] = copy
bufferSampleOffsets[headIndex] = 0
headIndex = (headIndex + 1) % maxBuffers
totalSamples += Int(copy.frameLength)
}
// Dequeue exactly 'requestedSamples' number of samples across potentially multiple buffers
func dequeue(requestedSamples: Int) -> AVAudioPCMBuffer? {
guard totalSamples > 0, requestedSamples > 0 else { return nil }
// Create a buffer to hold the requested samples
let samplesToCopy = min(requestedSamples, totalSamples)
guard let result = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(samplesToCopy)) else { return nil }
result.frameLength = AVAudioFrameCount(samplesToCopy)
var samplesCopied = 0
while samplesCopied < samplesToCopy {
// Check if we still have buffers to process
if size == 0 {
break
}
let currentBuffer = buffers[tailIndex]
let currentOffset = bufferSampleOffsets[tailIndex]
let remainingInBuffer = Int(currentBuffer.frameLength) - currentOffset
let samplesNeeded = samplesToCopy - samplesCopied
let samplesToCopyFromThisBuffer = min(remainingInBuffer, samplesNeeded)
// Copy samples from the current buffer
for channel in 0..<Int(format.channelCount) {
if let srcData = currentBuffer.floatChannelData?[channel],
let destData = result.floatChannelData?[channel] {
for i in 0..<samplesToCopyFromThisBuffer {
destData[samplesCopied + i] = srcData[currentOffset + i]
}
}
}
samplesCopied += samplesToCopyFromThisBuffer
totalSamples -= samplesToCopyFromThisBuffer
// Update the offset or remove the buffer if fully consumed
if currentOffset + samplesToCopyFromThisBuffer >= Int(currentBuffer.frameLength) {
// Buffer fully consumed, remove it
tailIndex = (tailIndex + 1) % maxBuffers
} else {
// Update offset
bufferSampleOffsets[tailIndex] += samplesToCopyFromThisBuffer
}
}
return result
}
// Has enough samples (1024 or more)
var hasEnoughSamples: Bool {
return totalSamples >= 1024
}
// Get the oldest 1024 samples
func getOldest1024Samples() -> AVAudioPCMBuffer? {
return dequeue(requestedSamples: 1024)
}
// Get the oldest 1024 samples
func getOldest2048Samples() -> AVAudioPCMBuffer? {
return dequeue(requestedSamples: 2048)
}
// Get available samples count
var availableSamples: Int {
return totalSamples
}
// Helper method to remove the oldest buffer
private func removeOldestBuffer() {
guard size > 0 else { return }
let removedBuffer = buffers[tailIndex]
let remainingSamples = Int(removedBuffer.frameLength) - bufferSampleOffsets[tailIndex]
totalSamples -= remainingSamples
tailIndex = (tailIndex + 1) % maxBuffers
}
// The size (number of buffers) in the queue
private var size: Int {
let diff = headIndex - tailIndex
return diff >= 0 ? diff : diff + maxBuffers
}
}
class AudioRecorderManager: NSObject, ObservableObject {
private var audioEngine = AVAudioEngine()
private var noiseCanceller: ONNXNoiseCanceller?
private var frameQueue: CircularAudioFrameQueue?
private var playerNode = AVAudioPlayerNode()
private var playbackTimer: Timer?
@Published var isRecording = false
@Published var isPlaying = false
@Published var errorMessage: String?
override init() {
super.init()
// Initialize noise cancellation model
noiseCanceller = ONNXNoiseCanceller()
setupAudioEngine()
}
private func setupAudioEngine() {
do {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .allowBluetooth])
try audioSession.setActive(true)
} catch {
errorMessage = "Failed to set up audio session: \(error.localizedDescription)"
print(errorMessage ?? "")
return
}
// Get the input format
let inputNode = audioEngine.inputNode
let inputFormat = inputNode.outputFormat(forBus: 0)
// Create a consistent format for processing
guard let processingFormat = AVAudioFormat(
standardFormatWithSampleRate: 48000,
channels: 2
) else {
errorMessage = "Failed to create audio format"
print(errorMessage ?? "")
return
}
// Initialize frame queue with capacity for 44100 samples
frameQueue = CircularAudioFrameQueue(format: processingFormat, maxBuffers: 100)
// Create a sink node for low-latency processing
let sinkNode = AVAudioSinkNode { [weak self] (timestamp, frameCount, audioBufferList) -> OSStatus in
guard let self = self else { return noErr }
// Get buffer pointer for direct processing
let ablPointer = UnsafeMutableAudioBufferListPointer(UnsafeMutablePointer(mutating: audioBufferList))
// Create a temporary buffer with 2 channels
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: processingFormat.sampleRate, channels: 2, interleaved: false)!
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: frameCount)!
pcmBuffer.frameLength = frameCount
// Copy data from input to both output channels
if let inBuffer = ablPointer.first,
let inData = inBuffer.mData?.assumingMemoryBound(to: Float.self),
let leftOutBuffer = pcmBuffer.floatChannelData?[0],
let rightOutBuffer = pcmBuffer.floatChannelData?[1] {
for frame in 0..<Int(frameCount) {
let sample = inData[frame]
leftOutBuffer[frame] = sample
rightOutBuffer[frame] = sample
}
}
// Store processed frames in the circular queue
self.frameQueue?.enqueue(pcmBuffer)
return noErr
}
// Set up the player node
audioEngine.attach(playerNode)
audioEngine.attach(sinkNode)
// Connect the input to the sink node
audioEngine.connect(audioEngine.inputNode, to: sinkNode, format: inputFormat)
// Connect player node to output
audioEngine.connect(playerNode, to: audioEngine.outputNode, format: processingFormat)
}
func toggleRecording() {
guard AVAudioSession.sharedInstance().recordPermission == .granted else {
requestMicrophonePermission()
return
}
if isRecording {
stopRecording()
} else {
startRecording()
}
}
func togglePlayback() {
if isPlaying {
stopPlayback()
} else {
startPlayback()
}
}
private func startRecording() {
do {
// If engine isn't running, start it
if !audioEngine.isRunning {
try audioEngine.start()
}
isRecording = true
errorMessage = nil
} catch {
errorMessage = "Error starting audio engine: \(error.localizedDescription)"
isRecording = false
print(errorMessage ?? "")
}
}
private func stopRecording() {
if audioEngine.isRunning && !isPlaying {
audioEngine.stop()
}
isRecording = false
}
private func startPlayback() {
do {
// Make sure engine is running
if !audioEngine.isRunning {
try audioEngine.start()
}
playerNode.play()
isPlaying = true
// Start a timer to check for available buffers to play
playbackTimer = Timer.scheduledTimer(withTimeInterval: 0.001, repeats: true) { [weak self] _ in
self?.scheduleNextBuffer()
}
} catch {
errorMessage = "Error starting playback: \(error.localizedDescription)"
print(errorMessage ?? "")
}
}
private func stopPlayback() {
playerNode.stop()
playbackTimer?.invalidate()
playbackTimer = nil
isPlaying = false
if !isRecording && audioEngine.isRunning {
audioEngine.stop()
}
}
private func scheduleNextBuffer() {
// Only schedule if we have at least 1024 samples available
guard let frameQueue = frameQueue, frameQueue.hasEnoughSamples else {
return
}
// Check if player node needs more buffers
if !playerNode.isPlaying {
playerNode.play()
}
// Get the oldest buffer from the queue
if let buffer = frameQueue.getOldest1024Samples() {
let len = buffer.frameLength
// Process audio with noise cancellation
let processedBuffer = noiseCanceller!.processAudioChunk(buffer)
// Schedule the processed buffer for playback
let finalBuffer = noiseCanceller!.processOutputTensor(processedBuffer!, audioEngine: self.audioEngine, noiseProfile: buffer)
playerNode.scheduleBuffer(finalBuffer!, at:nil,options:.interruptsAtLoop)
// Buffer completed playing
DispatchQueue.main.async {
self.scheduleNextBuffer()
}
}
}
private func requestMicrophonePermission() {
AVAudioSession.sharedInstance().requestRecordPermission { [weak self] granted in
DispatchQueue.main.async {
if granted {
self?.toggleRecording()
} else {
self?.errorMessage = "Microphone permission denied"
}
}
}
}
deinit {
playbackTimer?.invalidate()
if audioEngine.isRunning {
audioEngine.stop()
}
}
}
struct AudioRecorderView: View {
@StateObject private var audioManager = AudioRecorderManager()
var body: some View {
VStack {
if let errorMessage = audioManager.errorMessage {
Text(errorMessage)
.foregroundColor(.red)
.padding()
}
Text(audioManager.isRecording ? "Recording" : "Paused")
.foregroundColor(audioManager.isRecording ? .red : .gray)
.padding()
Button(action: {
audioManager.toggleRecording()
}) {
Text(audioManager.isRecording ? "Stop Recording" : "Start Recording")
.foregroundColor(.white)
.padding()
.background(audioManager.isRecording ? Color.red : Color.blue)
.cornerRadius(10)
}
.padding()
Button(action: {
audioManager.togglePlayback()
}) {
Text(audioManager.isPlaying ? "Stop Playback" : "Start Playback")
.foregroundColor(.white)
.padding()
.background(audioManager.isPlaying ? Color.orange : Color.green)
.cornerRadius(10)
}
}
}
}