I have this simple function to detect faces in an image:
func detectFacesForImage(image: UIImage)
{
guard let ciImage = CIImage(image: image) else {
return
}
let request = VNDetectFaceRectanglesRequest { [unowned self] request, error in
guard let observations = request.results as? [VNFaceObservation] else {
return
}
}
let handler = VNImageRequestHandler(ciImage: ciImage, options: [:])
do
{
try handler.perform([request])
}
catch
{
[HANDLE ERROR HERE]
}
}
Now we have observations
list which containing VNFaceObservation
objects. I'm using this following function to convert these objects to VNDetectedObjectObservation
objects.
func convertFaceObservationsToDetectedObjects(with observations: [VNFaceObservation])
{
observations.forEach { observation in
let boundingBox = observation.boundingBox
let size = CGSize(width: boundingBox.width * self.IMG_VIEW.bounds.width,
height: boundingBox.height * self.IMG_VIEW.bounds.height)
let origin = CGPoint(x: boundingBox.minX * self.IMG_VIEW.bounds.width,
y: (1 - observation.boundingBox.minY) * self.IMG_VIEW.bounds.height - size.height)
let originalRect = CGRect(origin: origin, size: size)
var convertedRect = cameraLayer.metadataOutputRectConverted(fromLayerRect: originalRect)
convertedRect.origin.y = 1 - convertedRect.origin.y
let trackingObservation = VNDetectedObjectObservation(boundingBox: convertedRect)
self.anotherListOfObservations.append((tag, trackingObservation))
}
}
Then i'm using this delegate function to try and track the given VNDetectedObjectObservation
objects:
extension MyViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection)
{
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
var listOfRequests:[VNTrackObjectRequest] = []
for (_, observation) in self.anotherListOfObservations
{
let request = VNTrackObjectRequest(detectedObjectObservation: observation) { [unowned self] request, error in
self.handle(request, error: error)
}
request.trackingLevel = .accurate
listOfRequests.append(request)
}
do {
try handler.perform(listOfRequests, on: pixelBuffer)
}
catch {
print(error)
}
}
}
My question is: is it really possible? Or doing it all wrong?
Best solution I found so far. Using latest Vision Framework to produce face features in real time.