iosswiftavfoundationavassetwriter

AVAssetWritter First Frames are either Blank or black


Problem: I am recording video frames by getting both audio and video buffers from CMSampleBuffer. Once the AssetWriter has finished writing the buffers, the final video results in first frame being black or either blank(considering it only considers the audio frames in the beginning). Although, randomly the video comes out totally normal and doesnt have a black frame.

What I tried: I tried to wait until I fetch the first video frame and then start recording. Yet I get the same erratic performance.

What I want: A proper video with no blank frames.

Below is the code that might help.

Capture Session

 func configureSession() {
    sessionQueue.async {
        print("SFC - Session Configuring")
        if self.setupResult != .success { return }
        
        self.session.beginConfiguration()
        self.session.sessionPreset = .high
        
        do {
            var defaultVideoDevice: AVCaptureDevice?

            if let frontCameraDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front){
                defaultVideoDevice = frontCameraDevice
            } else if let backCameraDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back){
                defaultVideoDevice = backCameraDevice
            } else if let dualCameraDevice = AVCaptureDevice.default(.builtInDualWideCamera, for: .video, position: .back) {
                defaultVideoDevice = dualCameraDevice
            }

            guard let videoDevice = defaultVideoDevice else {
                print("CAM - Camera unavailable")
                self.setupResult = .configurationFailed
                self.session.commitConfiguration()
                return
            }

            let videoInputDevice = try AVCaptureDeviceInput(device: videoDevice)

            if self.session.canAddInput(videoInputDevice) {
                self.session.addInput(videoInputDevice)
                self.videoDeviceInput = videoInputDevice
            } else {
                print("CAM - Couldn't add input to the session")
                self.setupResult = .configurationFailed
                self.session.commitConfiguration()
                return
            }
        } catch {
            print("CAM - Couldn't create device input. Error - ", error.localizedDescription)
            self.setupResult = .configurationFailed
            self.session.commitConfiguration()
            return
        }
        
        
        self.videoOutput.setSampleBufferDelegate(self, queue: self.videoQueue)
        if self.session.canAddOutput(self.videoOutput) {
            self.session.addOutput(self.videoOutput)
            self.photoQualityPrioritizationMode = .balanced
        } else {
            print("Could not add photo output to the session")
            self.setupResult = .configurationFailed
            self.session.commitConfiguration()
            return
        }

        self.videoOutput.connections.first?.videoOrientation = .portrait
        self.videoOutput.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA ]
        self.videoOutput.alwaysDiscardsLateVideoFrames = true
        
        
        // Configure for photos
        if self.session.canAddOutput(self.photoOutput) {
            self.session.addOutput(self.photoOutput)
        }
        
        do {
            let audioDevice = AVCaptureDevice.default(for: .audio)
            let audioDeviceInput = try AVCaptureDeviceInput(device: audioDevice!)
            if self.session.canAddInput(audioDeviceInput) {
                self.session.addInput(audioDeviceInput)
            } else { print("CAM - Couldn't add audio input device to session.") }
        } catch { print("couldn't create audio input device. Error - ",error.localizedDescription) }
    
        
        self.audioOutput.setSampleBufferDelegate(self, queue: self.videoQueue)
        if self.session.canAddOutput(self.audioOutput) {
            print("SFC - registered audio output with type")
            self.session.addOutput(self.audioOutput)
        } else {
            print("Couldn't add audio output")
            self.setupResult = .configurationFailed
            self.session.commitConfiguration()
        }
    
        self.session.commitConfiguration()

    }
}

func startSession() {
    if SMConstants.currentDevice.isSimulator { return }
    sessionQueue.async {
        self.configureSession()
        print("SFC - Frame Buffers Session Starting")
        self.session.startRunning()
        self.isSessionRunning = self.session.isRunning
        self.sessionQueue.asyncAfter(deadline: .now() + 1) {
            self.addObservers()
        }
    }
}

Buffer Writer

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
    if !frameCaptureRunning { return }
    write(output: output, buffer: sampleBuffer)
}

public func write(output: AVCaptureOutput, buffer: CMSampleBuffer) {
    writerQueue.sync {
        
        if assetWriter == nil { self.setupWriter() }

        if self.assetWriter?.status == .unknown {
            self.assetWriter?.startWriting()
            self.assetWriter?.startSession(atSourceTime: CMSampleBufferGetPresentationTimeStamp(buffer))
            printDone(item: "Started AssetWriter")
        }
        if self.assetWriter?.status == .failed {
            printError(item: "Asset Writer Failed with Error: \(String(describing: self.assetWriter?.error))")
            return
        }

    
        if CMSampleBufferDataIsReady(buffer) {
            if output == videoOutput {
                if let videoInput = self.videoInput, videoInput.isReadyForMoreMediaData {
                    videoInput.append(buffer)
                    printLog(item: "🌠 Pulling video only \(output)")
                    video_frames_written = true
                }
            }
            
            if output == audioOutput {
                if !video_frames_written { return }
                if let audioInput = self.audioInput, audioInput.isReadyForMoreMediaData {
                    audioInput.append(buffer)
                    printLog(item: "💥 Pulling Audio only \(output)")
                }
            }
        }
    }
}

private func setupWriter() {
    
    clearAndResetFilesInDirectory()
    
    self.assetWriter = try? AVAssetWriter(outputURL: self.url, fileType: AVFileType.mp4)
    
    let videoOutputSettings = [
        AVVideoCodecKey: AVVideoCodecType.h264,
        AVVideoHeightKey: 1920,
        AVVideoWidthKey:1080
    ] as [String : Any]


    self.videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoOutputSettings)
    self.videoInput?.expectsMediaDataInRealTime = true
    if let videoInput = self.videoInput, (self.assetWriter?.canAdd(videoInput))! {
        self.assetWriter?.add(videoInput)
    }
    
    
    let audioOutputSettings = [
        AVFormatIDKey: kAudioFormatMPEG4AAC,
        AVNumberOfChannelsKey: 1,
        AVSampleRateKey: 44100,
        AVEncoderBitRateKey: 64000
    ] as [String: Any]
    
    
    self.audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: audioOutputSettings)
    self.audioInput?.expectsMediaDataInRealTime = true
    if let audioInput = self.audioInput, (self.assetWriter?.canAdd(audioInput))! {
        printDone(item: "Added Input")
        self.assetWriter?.add(audioInput)
    } else { printError(item: "No audio Input") }
    
    
}

ViewWillAppear in CameraController

override func viewWillAppear(_ animated: Bool) {
    super.viewWillAppear(animated)
    frameCapture = FrameCapture(filename: "test.mp4", delegate: self)
    frameCapture.startSession()
    previewView.session = frameCapture.session
}

This is all I feel is apt for your idea. If you think I need to provide more info, please let me know. I'll be prompt. Thanks for your time.


Solution

  • You probably want to startSession on a video buffer - if an audio buffer arrives first, with an earlier timestamp than the first video buffer, then you'll get blank or black initial frames.