I'm receiving audio chunks over WebSocket and storing them in an AVAudioPCMBuffer
for playback using AVAudioEngine
. However, when I start playing the audio, I hear noise instead of the expected sound. I suspect the issue is in my decodeAudioData
function, but I’m not sure what exactly is going wrong.
Here is the full code:
import SwiftUI
import AVFoundation
import Combine
class WebSocketManager: ObservableObject {
private var webSocketTask: URLSessionWebSocketTask?
private let url = URL(string: "ws://localhost:6000/stream")!
private let audioEngine = AVAudioEngine()
private let player = AVAudioPlayerNode()
private var hasStartedPlaying = false;
private let inputFormat: AVAudioFormat!
private let outputFormat: AVAudioFormat
private var buffersInQueue = 0;
init() {
inputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 44100, channels: AVAudioChannelCount(2), interleaved: true)!
outputFormat = audioEngine.mainMixerNode.outputFormat(forBus: 0)
if !audioEngine.attachedNodes.contains(player) {
audioEngine.attach(player)
}
audioEngine.connect(player, to: audioEngine.mainMixerNode, format: nil)
audioEngine.prepare()
}
func connectAndPlay() {
webSocketTask = URLSession.shared.webSocketTask(with: url)
webSocketTask?.resume()
if !audioEngine.isRunning {
do {
try audioEngine.start()
print("🔊 Audio Engine Started")
} catch {
print("⚠️ Failed to start audio engine: \(error)")
return
}
}
receiveMessage()
}
private func closeConnection() {
webSocketTask?.cancel()
webSocketTask = nil
print("WebSocket closed")
}
private func receiveMessage() {
webSocketTask?.receive { [weak self] result in
guard let self = self else { return }
switch result {
case .success(.string(let message)):
print("Повідомлення", message.count)
DispatchQueue.main.async {
if message == "END" {
self.closeConnection()
return;
} else {
self.addBufferToQueue(chunk: message)
if !self.hasStartedPlaying ?? true {
print("Playing")
self.playAudio()
self.hasStartedPlaying = true
}
}
}
self.receiveMessage()
case .failure(let error):
print("WebSocket Error: \(error)")
default:
print("Unknown")
}
}
}
func addBufferToQueue(chunk: String) {
guard let audioBuffer = self.decodeAudioData(chunk) else {
NSLog("Failed auido buffer")
return
}
self.addToBuffer(buffer: audioBuffer)
}
private func addToBuffer(buffer: AVAudioPCMBuffer) {
guard buffer.format.isEqual(outputFormat) else {
NSLog("No equal \(buffer.format) \(String(describing: outputFormat))")
return
}
self.buffersInQueue += 1
player.scheduleBuffer(buffer) {
DispatchQueue.main.async { [weak self] in
guard let self else { return }
self.buffersInQueue -= 1
if self.buffersInQueue == 0 {
// self.onBufferFinished?()
print("Finished")
}
}
}
}
private func decodeAudioData(_ base64String: String) -> AVAudioPCMBuffer? {
guard let data = Data(base64Encoded: base64String) else {
NSLog("Error decoding base64 data")
return nil
}
guard let inputFormat = inputFormat else {
NSLog("Error: Audio format is nil")
return nil
}
let frameCount = UInt32(data.count) / inputFormat.streamDescription.pointee.mBytesPerFrame
guard let inputBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount) else {
NSLog("Error creating AVAudioPCMBuffer")
return nil
}
print(frameCount, inputFormat.channelCount)
inputBuffer.frameLength = frameCount
data.withUnsafeBytes { (bufferPointer: UnsafeRawBufferPointer) in
if let memory = bufferPointer.baseAddress?.assumingMemoryBound(to: Int16.self) {
inputBuffer.int16ChannelData?.pointee.update(from: memory, count: Int(frameCount) * Int(inputFormat.channelCount))
}
}
let sampleCount = Int(frameCount) * Int(inputFormat.channelCount)
print("sampleCount:", sampleCount)
guard let converter = AVAudioConverter(from: inputFormat, to: outputFormat) else {
NSLog("Error creating audio converter")
return nil
}
let converterFrameCapacity = AVAudioFrameCount(outputFormat.sampleRate / inputFormat.sampleRate * Double(inputBuffer.frameCapacity))
guard let convertedBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: converterFrameCapacity) else {
NSLog("Error creating converted buffer")
return nil
}
convertedBuffer.frameLength = convertedBuffer.frameCapacity
var error: NSError?
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
outStatus.pointee = .haveData
return inputBuffer
}
converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputBlock)
if let error = error {
NSLog("Error during conversion: \(error)")
return nil
}
print("Converted Buffer Frame Count: \(convertedBuffer.frameLength)")
return convertedBuffer
}
func stopAudio() {
player.stop()
audioEngine.stop()
closeConnection()
hasStartedPlaying = false;
}
private func playAudio() {
if !audioEngine.isRunning {
do {
try audioEngine.start()
} catch {
print("⚠️ Failed to start audio engine: \(error)")
return
}
}
player.play();
}
}
struct ContentView: View {
@StateObject private var webSocketManager = WebSocketManager()
var body: some View {
VStack {
Image(systemName: "globe")
.imageScale(.large)
.foregroundStyle(.tint)
Text("Streaming Audio")
Button("Play Audio") {
webSocketManager.connectAndPlay()
}
Button("Stop Audio") {
webSocketManager.stopAudio()
}
}
.padding()
}
}
#Preview {
ContentView()
}
Your problem is that you are creating LPCM AVAudioPCMBuffer
s out of what is actually MP3
data (format specified in comment). Interpreting mp3 data as LPCM will give you a cacophony.
You should instead create AVAudioCompressedBuffer
buffers.
However then you have a different problem because AVAudioPlayerNode
can't directly play compressed buffers.
It can play back compressed audio files, so you could write the mp3 buffers out to a file (skipping AVAudioCompressedBuffer
altogether) and play the file with AVAudioPlayerNode
, although if the audio data is a long running stream then you probably don't want that, in which case you can use a single instance of an AVAudioConverter
to convert the mp3 AVAudioCompressedBuffer
s to AVAudioPCMBuffer
s which you then pass to the AVAudioPlayerNode
as before.