I'm writing logic to encode an audio stream to opus (to send over a web socket); I keep getting an Invalid frame size: 4800. Must be one of [120, 240, 480, 960, 1920, 2880]
error:
//
// AudioManager.swift
//
//
import Foundation
import AVFoundation
import Opus
class AudioManager: ObservableObject {
private var audioEngine: AVAudioEngine?
private var audioInputNode: AVAudioInputNode?
private weak var appManager: AppManager?
private var canSendData: Bool = false // Flag to control data sending
private var audioBufferQueue: AVAudioPCMBuffer?
private var opusEncoder: Opus.Encoder?
init(appManager: AppManager) {
self.appManager = appManager
setupOpusEncoder()
setupAudioSession()
setupAudioEngine()
}
private func setupAudioSession() {
let session = AVAudioSession.sharedInstance()
do {
try session.setPreferredSampleRate(48000)
try session.setPreferredInputNumberOfChannels(1)
try session.setCategory(.playAndRecord, mode: .default, options: [.defaultToSpeaker, .allowBluetooth])
try session.setActive(true)
print("Audio session setup complete with sample rate 48000 Hz and mono channel.")
} catch {
print("Failed to set up audio session: \(error)")
}
}
func checkMicrophonePermission() -> Bool {
return AVAudioSession.sharedInstance().recordPermission == .granted
}
func requestMicrophoneAccess(completion: @escaping (Bool) -> Void) {
let audioSession = AVAudioSession.sharedInstance()
switch audioSession.recordPermission {
case .granted:
completion(true)
case .denied:
completion(false)
case .undetermined:
audioSession.requestRecordPermission { granted in
DispatchQueue.main.async {
completion(granted)
}
}
@unknown default:
completion(false)
}
}
private func setupOpusEncoder() {
// Define the parameters for the audio format
let sampleRate = 48000.0 // Sample rate in Hz
let channels = 1 // Number of audio channels
// Attempt to create an AVAudioFormat instance
guard let opusFormat = AVAudioFormat(opusPCMFormat: .float32, sampleRate: sampleRate, channels: AVAudioChannelCount(channels)) else {
print("Invalid audio format parameters")
return
}
do {
// Create the Opus encoder with the valid audio format
opusEncoder = try Opus.Encoder(format: opusFormat)
print("Opus encoder successfully created")
} catch {
// Handle any errors that might occur during the encoder initialization
print("Failed to create Opus encoder: \(error)")
}
}
func setupAudioEngine() {
audioEngine = AVAudioEngine()
guard let audioEngine = audioEngine else {
print("Audio engine could not be initialized")
return
}
let inputNode = audioEngine.inputNode
let mixerNode = AVAudioMixerNode()
audioEngine.attach(mixerNode)
// Choose an Opus-compatible buffer size
let opusCompatibleBufferSize: AVAudioFrameCount = 960 // Choose based on your latency and quality requirements
let desiredFormat = AVAudioFormat(standardFormatWithSampleRate: 48000, channels: 1)
audioEngine.connect(inputNode, to: mixerNode, format: inputNode.inputFormat(forBus: 0))
audioEngine.connect(mixerNode, to: audioEngine.mainMixerNode, format: desiredFormat)
mixerNode.installTap(onBus: 0, bufferSize: opusCompatibleBufferSize, format: desiredFormat) { [weak self] (buffer, when) in
self?.bufferAudioData(buffer)
}
do {
try audioEngine.start()
print("Audio engine started with desired format.")
} catch {
print("Failed to start audio engine: \(error)")
}
}
private func bufferAudioData(_ buffer: AVAudioPCMBuffer) {
guard let encoder = opusEncoder else {
print("Opus encoder not initialized")
return
}
// Validate buffer format again before attempting to encode
if buffer.format.sampleRate != 48000 || buffer.format.channelCount != 1 {
print("Buffer format mismatch: Expected 48000 Hz, 1 channel, but got \(buffer.format.sampleRate) Hz, \(buffer.format.channelCount) channels")
return
}
// Ensure the buffer frame size is a valid Opus frame size
let validFrameSizes = [120, 240, 480, 960, 1920, 2880] // Frame sizes for 48000 Hz
guard validFrameSizes.contains(Int(buffer.frameLength)) else {
print("Invalid frame size: \(buffer.frameLength). Must be one of \(validFrameSizes)")
return
}
var opusData = Data() // Initialize an empty Data object to hold the encoded data.
do {
// Attempt to encode and capture the number of bytes encoded
let bytesEncoded = try encoder.encode(buffer, to: &opusData)
print("Encoded \(bytesEncoded) bytes of data.")
if !opusData.isEmpty && canSendData {
appManager?.webSocketManager.send(data: opusData) {
print("Opus encoded audio data sent.")
}
}
} catch let error as Opus.Error {
// Print the Opus error with its raw value and a possible interpretation
print("Failed to encode audio: Opus Error \(error.rawValue) - \(interpretOpusError(error))")
} catch {
// This catches non-Opus errors
print("Failed to encode audio: \(error)")
}
}
/// Interprets Opus error codes into human-readable descriptions
private func interpretOpusError(_ error: Opus.Error) -> String {
switch error {
case .ok:
return "No error."
case .badArgument:
return "One or more invalid/out of range arguments."
case .bufferTooSmall:
return "The mode struct passed is invalid."
case .internalError:
return "An internal error was detected."
case .invalidPacket:
return "The compressed data passed is corrupted."
case .unimplemented:
return "Invalid/unsupported request number."
case .invalidState:
return "An encoder or decoder structure is invalid or already freed."
case .allocationFailure:
return "Memory allocation has failed."
default:
return "Unknown error."
}
}
func startRecording() {
print("Starting recording...")
canSendData = true
appManager?.webSocketManager.send(string: "{\"command\": \"aq_start\"}") {
print("Sent start recording command.")
}
}
func stopRecording() {
print("Stopping recording...")
canSendData = false
appManager?.webSocketManager.send(string: "{\"command\": \"aq_stop\"}") {
print("Sent stop recording command.")
}
audioEngine?.stop()
print("Recording stopped.")
}
}
Your AVAudioNode
tap bufferSize
is being ignored and you're getting 10ms chunks of audio, while the opus encoder wants smaller chunks.
You need to break up the overly large audio buffers into Opus sized chunks.
I have never seen an AVAudioNode
tap honour its bufferSize
parameter, but a header file comment says
Supported range is [100, 400] ms
so it's probable that I too have simply been choosing buffer sizes that were too small.
But then the online documentation says
The implementation may choose another size.
which may explain why you didn't notice and my growing disenchantment with bufferSize
during these last 10 years of AVAudioEngine
.
At 48kHz, the tap allegedly supports a bufferSize
range of [4800, 19200] samples. If you were feeling lazy or just wanted a quick result, 19200 samples, which is a multiple of the Opus 1920 frame size, would mean you get no pesky remainder buffers, however as the doco says, the API is free to ignore bufferSize
so for code that other people might see you really should do it right.