pythonopencvface-recognitionimutils

cv2 capture video with minimized window


I have a small python script using cv2 to capture the first face detected and display that region only in a cv2 window. Everything works great.

Currently, the video feed will freeze when minimized. How can I allow my script to keep capturing the video if I minimize the cv2 window to the tray?

EDIT

I would also like to know if there is a better approach to this so I can reduce the load on the CPU. Currently running this script will use 14 - 20% of my cpu.

from __future__ import division
from imutils.video import VideoStream
import face_recognition
import imutils
import cv2

POINTS = []


def landmarkTrackSmoothing(box, factor, maxPoints=30):
    top = box[0][0]
    bottom = box[0][1]
    left = box[0][2]
    right = box[0][3]
    if len(POINTS) < maxPoints:
        maxPoints = len(POINTS)
    else:
        del POINTS[0]

    POINTS.append([top, bottom, left, right])
    mean = [int((sum(col)/len(col))/factor) for col in zip(*POINTS)]
    return mean


def cartoonFilter(roi):
    # 1) Edges
    gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
    gray = cv2.medianBlur(gray, 5)
    edges = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)

    # 2) Color
    color = cv2.bilateralFilter(roi, 9, 300, 300)

    # 3) Cartoon
    return cv2.bitwise_and(color, color, mask=edges)


def OpenCamera():
    vs = VideoStream(0 + cv2.CAP_DSHOW, framerate=120).start()
    vs.stream.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    vs.stream.set(cv2.CAP_PROP_FRAME_HEIGHT, 1024)
    roi = [0, 0, 0, 0]
    prev = [0, 0, 0, 0]

    # Add filter flags
    cartoonEffect = False

    # loop over frames from the video file stream
    while True:
        # grab the frame from the threaded video stream
        frame = vs.read()

        # downscale and convert to grayscale for fast processing
        # of landmark locations
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = imutils.resize(frame, width=240)

        # calculate upscale factor for landmark locations
        factor = float(gray.shape[1]) / frame.shape[1]

        # detect the (x, y)-coordinates of the bounding boxes
        # corresponding to each face in the input frame, then
        # the facial embeddings for each face
        boxes = face_recognition.face_locations(gray)
        box = list(map(list, boxes))
        # t, b, l, r = 0, 0, 0, 0

        # upscale landmark locations
        for i in range(len(box)):
            box = [landmarkTrackSmoothing(box, factor)]

        # loop over the recognized faces
        if (len(box) > 0):
            i = 0
            for (top, right, bottom, left) in box:
                # grab frames from face coordinates
                if (i == 0):
                    roi = frame[top:bottom, left:right]
                    prev = top, bottom, left, right
                    if cartoonEffect:
                        roi = cartoonFilter(roi)
                    i += 1

        # check to see if we are supposed to display the output frame to
        # the screen
        if (len(box) == 0):
            if (prev[0] > 0):
                roi = frame[prev[0]:prev[1], prev[2]:prev[3]]
            else:
                roi = frame

        cv2.namedWindow("Frame", cv2.WINDOW_NORMAL)
        if (roi.any()):
            cv2.imshow("Frame", roi)
        cv2.resizeWindow("Frame", 512, 512)

        # continue looping until quit: expandable to add dynamic key commands for filters
        key = cv2.waitKey(1) & 0xFF

        if key == ord("q"):
            break
        if key == ord('c'):
            if cartoonEffect:
                cartoonEffect = False
            else:
                cartoonEffect = True

            # do a bit of cleanup on quit
    cv2.destroyAllWindows()
    vs.stop()


# Begin capturing
OpenCamera()

Solution

  • I managed to reduce my CPU usage to 1-10% using haarcascade instead of what i was doing. Still not able to run camera capture when window is minimized.

    final solution.

    import cv2
    import dlib
    import time
    POINTS = []
    
    
    def followFaceSmoothing(roi, maxPoints=30):
        top = roi[0]
        bottom = roi[1]
        left = roi[2]
        right = roi[3]
        if len(POINTS) < maxPoints:
            maxPoints = len(POINTS)
        else:
            del POINTS[0]
    
        POINTS.append([top, bottom, left, right])
        mean = [int((sum(col)/len(col))) for col in zip(*POINTS)]
        return mean
    
    
    # Initialize a face cascade using the frontal face haar cascade provided with
    # the OpenCV library
    faceCascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    
    
    def detectAndTrackLargestFace():
        # Capture video frames
        capture = cv2.VideoCapture(0)
        roi, lastroi = [], []
    
        # Create opencv named window
        cv2.namedWindow("Follow Face", cv2.WINDOW_NORMAL)
    
        # Start the window thread for the window we are using
        cv2.startWindowThread()
    
        # Create the tracker we will use
        tracker = dlib.correlation_tracker()
    
        # The variable we use to keep track of the fact whether we are
        # currently using the dlib tracker
        trackingFace = 0
    
        # Variable to track frame count so we can refresh landmark detection
        frames = 0
    
        try:
            while True:
                # Retrieve the latest image from the webcam
                rc, img = capture.read()
    
                # Check if a key was pressed and if it was Q, then destroy all
                # opencv windows and exit the application
                pressedKey = cv2.waitKey(2)
                if pressedKey == ord('Q'):
                    cv2.destroyAllWindows()
                    exit(0)
                elif pressedKey == ord('R'):  # reset landmark detection to refresh img
                    roi = []
                    trackingFace = 0
    
                # If we are not tracking a face, then try to detect one
                if not trackingFace:
                    # convert the img to gray-based image
                    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    # find all faces
                    faces = faceCascade.detectMultiScale(
                        gray, minNeighbors=10, minSize=(50, 50), maxSize=(300, 300))
    
                    # get largest face based on the largest
                    # area. initialize the required variables to 0
                    maxArea = 0
                    x = 0
                    y = 0
                    w = 0
                    h = 0
    
                    # Loop over faces and check if the area is the
                    # largest convert it to int fordlib tracker.
                    for (_x, _y, _w, _h) in faces:
                        if _w*_h > maxArea:
                            x = int(_x)
                            y = int(_y)
                            w = int(_w)
                            h = int(_h)
                            maxArea = w*h
    
                    # If one or more faces are found, initialize the tracker
                    # on the largest face
                    if maxArea > 0:
                        # Initialize the tracker
                        tracker.start_track(img,
                                            dlib.rectangle(x-10,
                                                           y-20,
                                                           x+w+10,
                                                           y+h+20))
    
                        # Set the indicator variable when actively tracking region in the image
                        trackingFace = 1
                    time.sleep(0.06)
    
                # Check if the tracker is actively tracking a region in the image
                if trackingFace:
    
                    # Update the tracker and request quality of the tracking update
                    trackingQuality = tracker.update(img)
    
                    # determine the updated position of the tracked region and crop
                    if trackingQuality >= 8.75:
                        tracked_position = tracker.get_position()
    
                        t_x = int(tracked_position.left())
                        t_y = int(tracked_position.top())
                        t_w = int(tracked_position.width())
                        t_h = int(tracked_position.height())
                        roi = t_y, t_y+t_h, t_x, t_x+t_w
                        if (roi[0] > 0):
                            roi = followFaceSmoothing(roi)
                            lastroi = roi
    
                    else:
                        trackingFace = 0
    
                    # reset every 60 frames to refresh face tracking
                    frames += 1
                    if (frames > 59):
                        frames = 0
                        roi = []
                        trackingFace = 0
    
                # show the image on the screen
                if img.any():
                    if (len(lastroi) > 0):
                        img = img[lastroi[0]:lastroi[1], lastroi[2]:lastroi[3]]
                    cv2.imshow("Follow Face", img)
                    cv2.resizeWindow("Follow Face", 320, 320)
        except:
            pass
    
    
    if __name__ == '__main__':
        detectAndTrackLargestFace()