swift base64 avfoundation avaudioplayer audio-streaming

Playing base64 audio chunks in Swift

I'm receiving audio chunks over WebSocket and storing them in an AVAudioPCMBuffer for playback using AVAudioEngine. However, when I start playing the audio, I hear noise instead of the expected sound. I suspect the issue is in my decodeAudioData function, but I’m not sure what exactly is going wrong.

Here is the full code:

import SwiftUI
import AVFoundation
import Combine

class WebSocketManager: ObservableObject {
    private var webSocketTask: URLSessionWebSocketTask?
    private let url = URL(string: "ws://localhost:6000/stream")!
    private let audioEngine = AVAudioEngine()
    private let player = AVAudioPlayerNode()
    private var hasStartedPlaying = false;
    private let inputFormat: AVAudioFormat!
    private let outputFormat: AVAudioFormat
    
    private var buffersInQueue = 0;
    
    init() {
        inputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 44100, channels: AVAudioChannelCount(2), interleaved: true)!
        outputFormat = audioEngine.mainMixerNode.outputFormat(forBus: 0)
        
        if !audioEngine.attachedNodes.contains(player) {
            audioEngine.attach(player)
        }
        audioEngine.connect(player, to: audioEngine.mainMixerNode, format: nil)
        audioEngine.prepare()
    }
    
    func connectAndPlay() {
        webSocketTask = URLSession.shared.webSocketTask(with: url)
        webSocketTask?.resume()
        
        if !audioEngine.isRunning {
            do {
                try audioEngine.start()
                print("🔊 Audio Engine Started")
            } catch {
                print("⚠️ Failed to start audio engine: \(error)")
                return
            }
        }
        
        receiveMessage()
    }
    
    private func closeConnection() {
        webSocketTask?.cancel()
        webSocketTask = nil
        print("WebSocket closed")
    }
    
    private func receiveMessage() {
        webSocketTask?.receive { [weak self] result in
            guard let self = self else { return }
            
            switch result {
            case .success(.string(let message)):
                print("Повідомлення", message.count)
                DispatchQueue.main.async {
                    if message == "END" {
                        self.closeConnection()
                        return;
                    } else {
                        self.addBufferToQueue(chunk: message)
                        
                        if !self.hasStartedPlaying ?? true {
                            print("Playing")
                            self.playAudio()
                            self.hasStartedPlaying = true
                        }
                    }
                }
                self.receiveMessage()
            case .failure(let error):
                print("WebSocket Error: \(error)")
            default:
                print("Unknown")
            }
        }
    }
    
    func addBufferToQueue(chunk: String) {
        guard let audioBuffer = self.decodeAudioData(chunk) else {
          NSLog("Failed auido buffer")
          return
        }

        self.addToBuffer(buffer: audioBuffer)
    }
    
    private func addToBuffer(buffer: AVAudioPCMBuffer) {
        guard buffer.format.isEqual(outputFormat) else {
          NSLog("No equal \(buffer.format) \(String(describing: outputFormat))")
          return
        }
        
        self.buffersInQueue += 1

        player.scheduleBuffer(buffer) {
          DispatchQueue.main.async { [weak self] in
            guard let self else { return }

            self.buffersInQueue -= 1
            
            if self.buffersInQueue == 0 {
//              self.onBufferFinished?()
                print("Finished")
            }
          }
        }
      }
    
    private func decodeAudioData(_ base64String: String) -> AVAudioPCMBuffer? {
        guard let data = Data(base64Encoded: base64String) else {
            NSLog("Error decoding base64 data")
            return nil
        }
        
        guard let inputFormat = inputFormat else {
          NSLog("Error: Audio format is nil")
          return nil
        }
        
        let frameCount = UInt32(data.count) / inputFormat.streamDescription.pointee.mBytesPerFrame
        guard let inputBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount) else {
           NSLog("Error creating AVAudioPCMBuffer")
           return nil
         }
        
        print(frameCount, inputFormat.channelCount)
        
        inputBuffer.frameLength = frameCount
        data.withUnsafeBytes { (bufferPointer: UnsafeRawBufferPointer) in
          if let memory = bufferPointer.baseAddress?.assumingMemoryBound(to: Int16.self) {
            inputBuffer.int16ChannelData?.pointee.update(from: memory, count: Int(frameCount) * Int(inputFormat.channelCount))
          }
        }
        
        let sampleCount = Int(frameCount) * Int(inputFormat.channelCount)
        print("sampleCount:", sampleCount)
        
        guard let converter = AVAudioConverter(from: inputFormat, to: outputFormat) else {
          NSLog("Error creating audio converter")
          return nil
        }
        
        let converterFrameCapacity = AVAudioFrameCount(outputFormat.sampleRate / inputFormat.sampleRate * Double(inputBuffer.frameCapacity))
        guard let convertedBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: converterFrameCapacity) else {
           NSLog("Error creating converted buffer")
           return nil
         }
         convertedBuffer.frameLength = convertedBuffer.frameCapacity
        
        var error: NSError?
        let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
          outStatus.pointee = .haveData
          return inputBuffer
        }
        converter.convert(to: convertedBuffer, error: &error, withInputFrom: inputBlock)

        if let error = error {
          NSLog("Error during conversion: \(error)")
          return nil
        }
        
        print("Converted Buffer Frame Count: \(convertedBuffer.frameLength)")
          
        return convertedBuffer
    }
    
    func stopAudio() {
        player.stop()
        audioEngine.stop()
        closeConnection()
        hasStartedPlaying = false;
    }
    
    private func playAudio() {
        if !audioEngine.isRunning {
            do {
                try audioEngine.start()
            } catch {
                print("⚠️ Failed to start audio engine: \(error)")
                return
            }
        }
        
        player.play();
    }
}

struct ContentView: View {
    @StateObject private var webSocketManager = WebSocketManager()
    
    var body: some View {
        VStack {
            Image(systemName: "globe")
                .imageScale(.large)
                .foregroundStyle(.tint)
            Text("Streaming Audio")
            Button("Play Audio") {
                webSocketManager.connectAndPlay()
            }
            Button("Stop Audio") {
                webSocketManager.stopAudio()
            }
        }
        .padding()
    }
}

#Preview {
    ContentView()
}

Solution

Your problem is that you are creating LPCM AVAudioPCMBuffers out of what is actually MP3 data (format specified in comment). Interpreting mp3 data as LPCM will give you a cacophony.

You should instead create AVAudioCompressedBuffer buffers. However then you have a different problem because AVAudioPlayerNode can't directly play compressed buffers.

It can play back compressed audio files, so you could write the mp3 buffers out to a file (skipping AVAudioCompressedBuffer altogether) and play the file with AVAudioPlayerNode, although if the audio data is a long running stream then you probably don't want that, in which case you can use a single instance of an AVAudioConverter to convert the mp3 AVAudioCompressedBuffers to AVAudioPCMBuffers which you then pass to the AVAudioPlayerNode as before.