I have a macOS Swift app that gets information from the user's microphone. I'm attempting to use the new (macOS 14+) APIs available on AVAudioApplication
that allow the user to use gestures (pressing the stem) to mute the audio input.
According to WWDC, there are two "levels" of doing this: getting a notification and handling it at the app level, or using a lower-level CoreAudio API. I'm trying to do the former in this case.
Here's my example code (the relevant part is just Manager
-- the rest is just a ton of boilerplate to get microphone input via CoreAudio so that this works as a minimal reproducible example).
class Manager: ObservableObject {
private var controller: AudioInputController?
private var cancellable: AnyCancellable?
init() {
cancellable = NotificationCenter.default.publisher(for: AVAudioApplication.inputMuteStateChangeNotification)
.sink { notification in
print("Notification", notification)
}
do {
try AVAudioApplication.shared.setInputMuteStateChangeHandler { isMuted in
print("Mute state", isMuted, Date())
return true
}
} catch {
assertionFailure()
print("Error setting up handler", error)
}
controller = AudioInputController()!
controller?.start()
}
}
struct ContentView: View {
@StateObject private var manager = Manager()
var body: some View {
VStack {
Image(systemName: "globe")
.imageScale(.large)
.foregroundStyle(.tint)
}
.padding()
}
}
func getDefaultAudioDeviceID() -> AudioDeviceID? {
var deviceID = AudioDeviceID()
var dataSize = UInt32(MemoryLayout<AudioDeviceID>.size)
var propertyAddress = AudioObjectPropertyAddress(
mSelector: kAudioHardwarePropertyDefaultInputDevice,
mScope: kAudioObjectPropertyScopeInput,
mElement: kAudioObjectPropertyElementMain
)
let status = AudioObjectGetPropertyData(
AudioObjectID(kAudioObjectSystemObject),
&propertyAddress,
0,
nil,
&dataSize,
&deviceID
)
guard status == noErr else {
assertionFailure()
return nil
}
return deviceID
}
private final class AudioInputController {
private var auHAL: AudioComponentInstance?
private var inputBufferList: UnsafeMutableAudioBufferListPointer?
private var sampleRate: Float = 0.0
init?() {
guard let audioDeviceID = getDefaultAudioDeviceID() else {
assertionFailure()
return nil
}
var osStatus: OSStatus = noErr
// Create an AUHAL instance.
var description = AudioComponentDescription(
componentType: kAudioUnitType_Output,
componentSubType: kAudioUnitSubType_HALOutput,
componentManufacturer: kAudioUnitManufacturer_Apple,
componentFlags: 0,
componentFlagsMask: 0
)
guard let component = AudioComponentFindNext(nil, &description) else {
assertionFailure()
return
}
osStatus = AudioComponentInstanceNew(component, &auHAL)
guard osStatus == noErr, let auHAL else {
return nil
}
// Enable the input bus, and disable the output bus.
let kInputElement: UInt32 = 1
let kOutputElement: UInt32 = 0
var kInputData: UInt32 = 1
var kOutputData: UInt32 = 0
let ioDataSize: UInt32 = UInt32(MemoryLayout<UInt32>.size)
osStatus = AudioUnitSetProperty(
auHAL,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
kInputElement,
&kInputData,
ioDataSize
)
guard osStatus == noErr else {
assertionFailure()
return nil
}
osStatus = AudioUnitSetProperty(
auHAL,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output,
kOutputElement,
&kOutputData,
ioDataSize
)
if osStatus != noErr {
assertionFailure()
}
var inputDevice: AudioDeviceID = audioDeviceID
let inputDeviceSize: UInt32 = UInt32(MemoryLayout<AudioDeviceID>.size)
osStatus = AudioUnitSetProperty(
auHAL,
AudioUnitPropertyID(kAudioOutputUnitProperty_CurrentDevice),
AudioUnitScope(kAudioUnitScope_Global),
0,
&inputDevice,
inputDeviceSize
)
guard osStatus == noErr else {
assertionFailure()
return nil
}
// Adopt the stream format.
var deviceFormat = AudioStreamBasicDescription()
var desiredFormat = AudioStreamBasicDescription()
var ioFormatSize: UInt32 = UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
osStatus = AudioUnitGetProperty(
auHAL,
AudioUnitPropertyID(kAudioUnitProperty_StreamFormat),
AudioUnitScope(kAudioUnitScope_Input),
kInputElement,
&deviceFormat,
&ioFormatSize
)
guard osStatus == noErr else {
assertionFailure()
return nil
}
osStatus = AudioUnitGetProperty(
auHAL,
AudioUnitPropertyID(kAudioUnitProperty_StreamFormat),
AudioUnitScope(kAudioUnitScope_Output),
kInputElement,
&desiredFormat,
&ioFormatSize
)
guard osStatus == noErr else {
assertionFailure()
return nil
}
// Same sample rate, same number of channels.
desiredFormat.mSampleRate = deviceFormat.mSampleRate
desiredFormat.mChannelsPerFrame = deviceFormat.mChannelsPerFrame
// Canonical audio format.
desiredFormat.mFormatID = kAudioFormatLinearPCM
desiredFormat
.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked | kAudioFormatFlagIsNonInterleaved
desiredFormat.mFramesPerPacket = 1
desiredFormat.mBytesPerFrame = UInt32(MemoryLayout<Float32>.size)
desiredFormat.mBytesPerPacket = UInt32(MemoryLayout<Float32>.size)
desiredFormat.mBitsPerChannel = 8 * UInt32(MemoryLayout<Float32>.size)
osStatus = AudioUnitSetProperty(
auHAL,
AudioUnitPropertyID(kAudioUnitProperty_StreamFormat),
AudioUnitScope(kAudioUnitScope_Output),
kInputElement,
&desiredFormat,
UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
)
guard osStatus == noErr else {
assertionFailure()
return nil
}
// Store the format information.
sampleRate = Float(desiredFormat.mSampleRate)
// Get the buffer frame size.
var bufferSizeFrames: UInt32 = 0
var bufferSizeFramesSize = UInt32(MemoryLayout<UInt32>.size)
osStatus = AudioUnitGetProperty(
auHAL,
AudioUnitPropertyID(kAudioDevicePropertyBufferFrameSize),
AudioUnitScope(kAudioUnitScope_Global),
0,
&bufferSizeFrames,
&bufferSizeFramesSize
)
guard osStatus == noErr else {
assertionFailure()
return nil
}
let bufferSizeBytes: UInt32 = bufferSizeFrames * UInt32(MemoryLayout<Float32>.size)
let channels: UInt32 = deviceFormat.mChannelsPerFrame
inputBufferList = AudioBufferList.allocate(maximumBuffers: Int(channels))
for i in 0 ..< Int(channels) {
inputBufferList?[i] = AudioBuffer(
mNumberChannels: channels,
mDataByteSize: UInt32(bufferSizeBytes),
mData: malloc(Int(bufferSizeBytes))
)
}
var callbackStruct = AURenderCallbackStruct(
inputProc: { (
inRefCon: UnsafeMutableRawPointer,
ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
inTimeStamp: UnsafePointer<AudioTimeStamp>,
inBusNumber: UInt32,
inNumberFrame: UInt32,
_: UnsafeMutablePointer<AudioBufferList>?
) -> OSStatus in
let owner = Unmanaged<AudioInputController>.fromOpaque(inRefCon).takeUnretainedValue()
owner.inputCallback(
ioActionFlags: ioActionFlags,
inTimeStamp: inTimeStamp,
inBusNumber: inBusNumber,
inNumberFrame: inNumberFrame
)
return noErr
},
inputProcRefCon: Unmanaged.passUnretained(self).toOpaque()
)
osStatus = AudioUnitSetProperty(
auHAL,
AudioUnitPropertyID(kAudioOutputUnitProperty_SetInputCallback),
AudioUnitScope(kAudioUnitScope_Global),
0,
&callbackStruct,
UInt32(MemoryLayout<AURenderCallbackStruct>.size)
)
guard osStatus == noErr else {
assertionFailure()
return nil
}
osStatus = AudioUnitInitialize(auHAL)
guard osStatus == noErr else {
assertionFailure()
return nil
}
}
deinit {
if let auHAL {
AudioOutputUnitStop(auHAL)
AudioComponentInstanceDispose(auHAL)
}
if let inputBufferList {
for buffer in inputBufferList {
free(buffer.mData)
}
}
}
private func inputCallback(
ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
inTimeStamp: UnsafePointer<AudioTimeStamp>,
inBusNumber: UInt32,
inNumberFrame: UInt32
) {
guard let inputBufferList,
let auHAL
else {
assertionFailure()
return
}
let err = AudioUnitRender(
auHAL,
ioActionFlags,
inTimeStamp,
inBusNumber,
inNumberFrame,
inputBufferList.unsafeMutablePointer
)
guard err == noErr else {
assertionFailure()
return
}
}
func start() {
guard let auHAL else {
assertionFailure()
return
}
let status: OSStatus = AudioOutputUnitStart(auHAL)
if status != noErr {
assertionFailure()
}
}
func stop() {
guard let auHAL else {
assertionFailure()
return
}
let status: OSStatus = AudioOutputUnitStop(auHAL)
if status != noErr {}
}
}
Note: if you attempt to run this, make sure to add Audio Input to the Capabilities of the app and the NSMicrophoneUsageDescription
key to the Info.plist
When I press the stem on my AirPods Pro (2nd generation), I get this:
How can I make sure that either the AVAudioApplication.inputMuteStateChangeNotification
or AVAudioApplication.shared.setInputMuteStateChangeHandler
actually get called when the stem is pressed?
Looks like the order of registering the notification is important here. inputMuteStateChangeNotification
must be listened to after setInputMuteStateChangeHandler
, which seems strange, because I would've assumed that the notification registration was basically an idempotent operation. The following change to Manager
works:
class Manager: ObservableObject {
private var controller: AudioInputController?
private var cancellable: AnyCancellable?
init() {
do {
try AVAudioApplication.shared.setInputMuteStateChangeHandler { isMuted in
print("Mute state", isMuted, Date())
return true
}
} catch {
assertionFailure()
print("Error setting up handler", error)
}
cancellable = NotificationCenter.default.publisher(for: AVAudioApplication.inputMuteStateChangeNotification)
.sink { notification in
print("Notification", notification)
}
controller = AudioInputController()!
controller?.start()
}
}
Thanks to Kuvonchbek Yakubov's answer for inspiring the idea that where the registration happens is important.