I'm currently trying to implement the MLKit from Firebase to use text recognition.
So far, I've got the code for the camera, which shows its live feed inside of an UIView. My intention is now to recognize text in this live feed, which I reckon is possible with the help of CMSampleBufferRef (let image = VisionImage(buffer: bufferRef) - see linked Firebase tutorial, Step 2
).
How am I able to create such CMSampleBufferRef
and to make it hold the live feed of the camera (UIView)?
My code for the camera:
@IBOutlet weak var cameraView: UIView!
var session: AVCaptureSession?
var device: AVCaptureDevice?
var input: AVCaptureDeviceInput?
var output: AVCaptureMetadataOutput?
var prevLayer: AVCaptureVideoPreviewLayer?
override func viewDidLoad() {
super.viewDidLoad()
prevLayer?.frame.size = cameraView.frame.size
}
func createSession() {
session = AVCaptureSession()
device = AVCaptureDevice.default(for: AVMediaType.video)
do{
input = try AVCaptureDeviceInput(device: device!)
}
catch{
print(error)
}
if let input = input{
session?.addInput(input)
}
prevLayer = AVCaptureVideoPreviewLayer(session: session!)
prevLayer?.frame.size = cameraView.frame.size
prevLayer?.videoGravity = AVLayerVideoGravity.resizeAspectFill
prevLayer?.connection?.videoOrientation = transformOrientation(orientation: UIInterfaceOrientation(rawValue: UIApplication.shared.statusBarOrientation.rawValue)!)
cameraView.layer.addSublayer(prevLayer!)
session?.startRunning()
}
func cameraWithPosition(position: AVCaptureDevice.Position) -> AVCaptureDevice? {
let deviceDiscoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInDualCamera, .builtInTelephotoCamera, .builtInTrueDepthCamera, .builtInWideAngleCamera, ], mediaType: .video, position: position)
if let device = deviceDiscoverySession.devices.first {
return device
}
return nil
}
override func viewWillTransition(to size: CGSize, with coordinator: UIViewControllerTransitionCoordinator) {
coordinator.animate(alongsideTransition: { (context) -> Void in
self.prevLayer?.connection?.videoOrientation = self.transformOrientation(orientation: UIInterfaceOrientation(rawValue: UIApplication.shared.statusBarOrientation.rawValue)!)
self.prevLayer?.frame.size = self.cameraView.frame.size
}, completion: { (context) -> Void in
})
super.viewWillTransition(to: size, with: coordinator)
}
func transformOrientation(orientation: UIInterfaceOrientation) -> AVCaptureVideoOrientation {
switch orientation {
case .landscapeLeft:
return .landscapeLeft
case .landscapeRight:
return .landscapeRight
case .portraitUpsideDown:
return .portraitUpsideDown
default:
return .portrait
}
}
Edit: I have added a functional Swift sample matching your language requirement:
import UIKit
import AVFoundation
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
@IBOutlet weak var cameraView: UIView!
var session: AVCaptureSession!
var device: AVCaptureDevice?
var input: AVCaptureDeviceInput?
var videoOutput: AVCaptureVideoDataOutput!
var output: AVCaptureMetadataOutput?
var prevLayer: AVCaptureVideoPreviewLayer!
override func viewDidLoad() {
super.viewDidLoad()
session = AVCaptureSession()
device = AVCaptureDevice.default(for: AVMediaType.video)
do{
input = try AVCaptureDeviceInput(device: device!)
}
catch{
print(error)
return
}
if let input = input {
if session.canAddInput(input) {
session.addInput(input)
}
}
videoOutput = AVCaptureVideoDataOutput()
videoOutput.videoSettings = [
String(kCVPixelBufferPixelFormatTypeKey): NSNumber(value: kCVPixelFormatType_32BGRA)
]
videoOutput.alwaysDiscardsLateVideoFrames = true
let queue = DispatchQueue(label: "video-frame-sampler")
videoOutput!.setSampleBufferDelegate(self, queue: queue)
if session.canAddOutput(videoOutput) {
session.addOutput(videoOutput)
if let connection = videoOutput.connection(with: .video) {
connection.videoOrientation = videoOrientationFromInterfaceOrientation()
if connection.isVideoStabilizationSupported {
connection.preferredVideoStabilizationMode = .auto
}
}
}
prevLayer = AVCaptureVideoPreviewLayer(session: session)
prevLayer.frame.size = cameraView.frame.size
prevLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill
cameraView.layer.addSublayer(prevLayer!)
session.startRunning()
}
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
//pass your sampleBuffer to vision API
//I recommend not to pass every frame however, skip some frames until camera is steady and focused
print("frame received")
}
func videoOrientationFromInterfaceOrientation() -> AVCaptureVideoOrientation {
return AVCaptureVideoOrientation(rawValue: UIApplication.shared.statusBarOrientation.rawValue)!
}
}
I see that you already have set up your input and preview layer but you need to set up your video capture output, as well, to capture your CMSampleBufferRef
frames.
To do this set up an object of type AVCaptureVideoDataOutput
with the following steps:
Create instance of AVCaptureVideoDataOutput
and configure
AVCaptureVideoDataOutput* videoOutput = [[AVCaptureVideoDataOutput new] autorelease];
videoOutput.videoSettings = @{(id)kCVPixelBufferPixelFormatTypeKey:@(kCVPixelFormatType_32BGRA)};
videoOutput.alwaysDiscardsLateVideoFrames = YES;
Set up frame capture (sample buffer) delegate of the configured output and add it to the session
dispatch_queue_t queue = dispatch_queue_create("video-frame-sampler", 0);
[videoOutput setSampleBufferDelegate:self queue:queue];
if ([self.session canAddOutput:videoOutput]) {
[self.session addOutput:videoOutput];
AVCaptureConnection* connection = [videoOutput connectionWithMediaType:AVMediaTypeVideo];
connection.videoOrientation = [self videoOrientationFromDeviceOrientation];
if (connection.supportsVideoStabilization) {
connection.preferredVideoStabilizationMode = AVCaptureVideoStabilizationModeAuto;
}
}
Implement captureOutput:didOutputSampleBuffer:fromConnection:
method where you are going to get your required CMSampleBufferRef
-(void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection {
//pass your sampleBuffer to vision API
//I recommend not to pass every frame however, skip some frames until camera is steady and focused
}
I'm a plain old Objective-C developer, but you can easily convert the code to Swift as per your need.
Additionally, here is the code for videoOrientationFromDeviceOrientation
method:
-(AVCaptureVideoOrientation)videoOrientationFromDeviceOrientation {
UIDeviceOrientation orientation = [UIDevice currentDevice].orientation;
AVCaptureVideoOrientation result = (AVCaptureVideoOrientation)orientation;
if ( orientation == UIDeviceOrientationLandscapeLeft )
result = AVCaptureVideoOrientationLandscapeRight;
else if ( orientation == UIDeviceOrientationLandscapeRight )
result = AVCaptureVideoOrientationLandscapeLeft;
return result;
}