iosswiftavfoundationavassetwriter

AVAssetWriter Video Output Does Not Play Appended Audio


I have an avassetwriter to record a video with an applied filter to then play back via avqueueplayer.

My issue is the audio output appends to the audio input, but no sound plays in the play back. Have not come across any existing solutions, and would appreciate any guidance available..

Secondarily, my .AVPlayerItemDidPlayToEndTime notification observer, which I have to loop the playback, does not fire as well..

AVCaptureSession Setup

func setupSession() {
    
    let session = AVCaptureSession()
    session.sessionPreset = .medium
    
    guard
        let camera = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front),
        let mic = AVCaptureDevice.default(.builtInMicrophone, for: .audio, position: .unspecified),
        let videoInput = try? AVCaptureDeviceInput(device: camera),
        let audioInput = try? AVCaptureDeviceInput(device: mic),
        session.canAddInput(videoInput), session.canAddInput(audioInput) else { return }
    
            
    let videoOutput = AVCaptureVideoDataOutput()
    let audioOutput = AVCaptureAudioDataOutput()
    guard session.canAddOutput(videoOutput), session.canAddOutput(audioOutput) else { return }
    let queue = DispatchQueue(label: "recordingQueue", qos: .userInteractive)
    videoOutput.setSampleBufferDelegate(self, queue: queue)
    audioOutput.setSampleBufferDelegate(self, queue: queue)
    
    session.beginConfiguration()
    
    session.addInput(videoInput)
    session.addInput(audioInput)
    session.addOutput(videoOutput)
    session.addOutput(audioOutput)
    
    session.commitConfiguration()
            
    if let connection = videoOutput.connection(with: AVMediaType.video) {
        if connection.isVideoStabilizationSupported { connection.preferredVideoStabilizationMode = .auto }
        connection.isVideoMirrored = true
        connection.videoOrientation = .portrait
    }
    
    _videoOutput = videoOutput
    _audioOutput = audioOutput
    _captureSession = session
    
    DispatchQueue.global(qos: .default).async { session.startRunning() }
}

AVAssetWriter Setup + didOutput Delegate

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
            
    let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer).seconds

if output == _videoOutput {
    if connection.isVideoOrientationSupported { connection.videoOrientation = .portrait }
            
    guard let cvImageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
    let ciImage = CIImage(cvImageBuffer: cvImageBuffer)
    
    guard let filteredCIImage = applyFilters(inputImage: ciImage) else { return }
    self.ciImage = filteredCIImage
    
    guard let cvPixelBuffer = getCVPixelBuffer(from: filteredCIImage) else { return }
    self.cvPixelBuffer = cvPixelBuffer
            
    self.ciContext.render(filteredCIImage, to: cvPixelBuffer, bounds: filteredCIImage.extent, colorSpace: CGColorSpaceCreateDeviceRGB())
            
    metalView.draw()
   }
            
    switch _captureState {
    case .start:
        
        guard let outputUrl = tempURL else { return }
        
        let writer = try! AVAssetWriter(outputURL: outputUrl, fileType: .mp4)
        
        let videoSettings = _videoOutput!.recommendedVideoSettingsForAssetWriter(writingTo: .mp4)
        let videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoSettings)
        videoInput.mediaTimeScale = CMTimeScale(bitPattern: 600)
        videoInput.expectsMediaDataInRealTime = true
        
        let pixelBufferAttributes = [
            kCVPixelBufferCGImageCompatibilityKey: NSNumber(value: true),
            kCVPixelBufferCGBitmapContextCompatibilityKey: NSNumber(value: true),
            kCVPixelBufferPixelFormatTypeKey: NSNumber(value: Int32(kCVPixelFormatType_32ARGB))
        ] as [String:Any]
        
        let adapter = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoInput, sourcePixelBufferAttributes: pixelBufferAttributes)
        if writer.canAdd(videoInput) { writer.add(videoInput) }
                                
        let audioSettings = _audioOutput!.recommendedAudioSettingsForAssetWriter(writingTo: .mp4) as? [String:Any]
        let audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: audioSettings)
        audioInput.expectsMediaDataInRealTime = true
        if writer.canAdd(audioInput) { writer.add(audioInput) }
    
        _filename = outputUrl.absoluteString
        _assetWriter = writer
        _assetWriterVideoInput = videoInput
        _assetWriterAudioInput = audioInput
        _adapter = adapter
        _captureState = .capturing
        _time = timestamp
                    
        writer.startWriting()
        writer.startSession(atSourceTime: .zero)
        
    case .capturing:
        
        if output == _videoOutput {
            if _assetWriterVideoInput?.isReadyForMoreMediaData == true {
                let time = CMTime(seconds: timestamp - _time, preferredTimescale: CMTimeScale(600))
                _adapter?.append(self.cvPixelBuffer, withPresentationTime: time)
            }
        } else if output == _audioOutput {
            if _assetWriterAudioInput?.isReadyForMoreMediaData == true {
                _assetWriterAudioInput?.append(sampleBuffer)
            }
        }
        break
        
    case .end:
        
        guard _assetWriterVideoInput?.isReadyForMoreMediaData == true, _assetWriter!.status != .failed else { break }
        
        _assetWriterVideoInput?.markAsFinished()
        _assetWriterAudioInput?.markAsFinished()
        _assetWriter?.finishWriting { [weak self] in
            
            guard let output = self?._assetWriter?.outputURL else { return }
            
            self?._captureState = .idle
            self?._assetWriter = nil
            self?._assetWriterVideoInput = nil
            self?._assetWriterAudioInput = nil
            
            
            self?.previewRecordedVideo(with: output)
        }
        
    default:
        break
    }
}

Solution

  • Start your timeline at the presentation timestamp of the first audio or video sample buffer that you encounter:

    writer.startSession(atSourceTime: CMSampleBufferGetPresentationTimeStamp(sampleBuffer))

    Previously you started the timeline at zero, but the captured sample buffers have timestamps that usually seem to be relative to amount of time passed since system boot, so there's a big, undesired duration between when your file "starts" (sourceTime for AVAssetWriter) and when video and audio appears.

    Your question doesn't say that you don't see video, and I'd half expect some video players to skip over a big bunch of nothing to the point in the timeline where your samples begin, but in any case the file is wrong.