cv2 capture video with minimized window

I have a small python script using cv2 to capture the first face detected and display that region only in a cv2 window. Everything works great.

Currently, the video feed will freeze when minimized. How can I allow my script to keep capturing the video if I minimize the cv2 window to the tray?

EDIT

I would also like to know if there is a better approach to this so I can reduce the load on the CPU. Currently running this script will use 14 - 20% of my cpu.

from __future__ import division
from imutils.video import VideoStream
import face_recognition
import imutils
import cv2

POINTS = []


def landmarkTrackSmoothing(box, factor, maxPoints=30):
    top = box[0][0]
    bottom = box[0][1]
    left = box[0][2]
    right = box[0][3]
    if len(POINTS) < maxPoints:
        maxPoints = len(POINTS)
    else:
        del POINTS[0]

    POINTS.append([top, bottom, left, right])
    mean = [int((sum(col)/len(col))/factor) for col in zip(*POINTS)]
    return mean


def cartoonFilter(roi):
    # 1) Edges
    gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
    gray = cv2.medianBlur(gray, 5)
    edges = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)

    # 2) Color
    color = cv2.bilateralFilter(roi, 9, 300, 300)

    # 3) Cartoon
    return cv2.bitwise_and(color, color, mask=edges)


def OpenCamera():
    vs = VideoStream(0 + cv2.CAP_DSHOW, framerate=120).start()
    vs.stream.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    vs.stream.set(cv2.CAP_PROP_FRAME_HEIGHT, 1024)
    roi = [0, 0, 0, 0]
    prev = [0, 0, 0, 0]

    # Add filter flags
    cartoonEffect = False

    # loop over frames from the video file stream
    while True:
        # grab the frame from the threaded video stream
        frame = vs.read()

        # downscale and convert to grayscale for fast processing
        # of landmark locations
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = imutils.resize(frame, width=240)

        # calculate upscale factor for landmark locations
        factor = float(gray.shape[1]) / frame.shape[1]

        # detect the (x, y)-coordinates of the bounding boxes
        # corresponding to each face in the input frame, then
        # the facial embeddings for each face
        boxes = face_recognition.face_locations(gray)
        box = list(map(list, boxes))
        # t, b, l, r = 0, 0, 0, 0

        # upscale landmark locations
        for i in range(len(box)):
            box = [landmarkTrackSmoothing(box, factor)]

        # loop over the recognized faces
        if (len(box) > 0):
            i = 0
            for (top, right, bottom, left) in box:
                # grab frames from face coordinates
                if (i == 0):
                    roi = frame[top:bottom, left:right]
                    prev = top, bottom, left, right
                    if cartoonEffect:
                        roi = cartoonFilter(roi)
                    i += 1

        # check to see if we are supposed to display the output frame to
        # the screen
        if (len(box) == 0):
            if (prev[0] > 0):
                roi = frame[prev[0]:prev[1], prev[2]:prev[3]]
            else:
                roi = frame

        cv2.namedWindow("Frame", cv2.WINDOW_NORMAL)
        if (roi.any()):
            cv2.imshow("Frame", roi)
        cv2.resizeWindow("Frame", 512, 512)

        # continue looping until quit: expandable to add dynamic key commands for filters
        key = cv2.waitKey(1) & 0xFF

        if key == ord("q"):
            break
        if key == ord('c'):
            if cartoonEffect:
                cartoonEffect = False
            else:
                cartoonEffect = True

            # do a bit of cleanup on quit
    cv2.destroyAllWindows()
    vs.stop()


# Begin capturing
OpenCamera()

Solution

I managed to reduce my CPU usage to 1-10% using haarcascade instead of what i was doing. Still not able to run camera capture when window is minimized.

final solution.

import cv2
import dlib
import time
POINTS = []


def followFaceSmoothing(roi, maxPoints=30):
    top = roi[0]
    bottom = roi[1]
    left = roi[2]
    right = roi[3]
    if len(POINTS) < maxPoints:
        maxPoints = len(POINTS)
    else:
        del POINTS[0]

    POINTS.append([top, bottom, left, right])
    mean = [int((sum(col)/len(col))) for col in zip(*POINTS)]
    return mean


# Initialize a face cascade using the frontal face haar cascade provided with
# the OpenCV library
faceCascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')


def detectAndTrackLargestFace():
    # Capture video frames
    capture = cv2.VideoCapture(0)
    roi, lastroi = [], []

    # Create opencv named window
    cv2.namedWindow("Follow Face", cv2.WINDOW_NORMAL)

    # Start the window thread for the window we are using
    cv2.startWindowThread()

    # Create the tracker we will use
    tracker = dlib.correlation_tracker()

    # The variable we use to keep track of the fact whether we are
    # currently using the dlib tracker
    trackingFace = 0

    # Variable to track frame count so we can refresh landmark detection
    frames = 0

    try:
        while True:
            # Retrieve the latest image from the webcam
            rc, img = capture.read()

            # Check if a key was pressed and if it was Q, then destroy all
            # opencv windows and exit the application
            pressedKey = cv2.waitKey(2)
            if pressedKey == ord('Q'):
                cv2.destroyAllWindows()
                exit(0)
            elif pressedKey == ord('R'):  # reset landmark detection to refresh img
                roi = []
                trackingFace = 0

            # If we are not tracking a face, then try to detect one
            if not trackingFace:
                # convert the img to gray-based image
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                # find all faces
                faces = faceCascade.detectMultiScale(
                    gray, minNeighbors=10, minSize=(50, 50), maxSize=(300, 300))

                # get largest face based on the largest
                # area. initialize the required variables to 0
                maxArea = 0
                x = 0
                y = 0
                w = 0
                h = 0

                # Loop over faces and check if the area is the
                # largest convert it to int fordlib tracker.
                for (_x, _y, _w, _h) in faces:
                    if _w*_h > maxArea:
                        x = int(_x)
                        y = int(_y)
                        w = int(_w)
                        h = int(_h)
                        maxArea = w*h

                # If one or more faces are found, initialize the tracker
                # on the largest face
                if maxArea > 0:
                    # Initialize the tracker
                    tracker.start_track(img,
                                        dlib.rectangle(x-10,
                                                       y-20,
                                                       x+w+10,
                                                       y+h+20))

                    # Set the indicator variable when actively tracking region in the image
                    trackingFace = 1
                time.sleep(0.06)

            # Check if the tracker is actively tracking a region in the image
            if trackingFace:

                # Update the tracker and request quality of the tracking update
                trackingQuality = tracker.update(img)

                # determine the updated position of the tracked region and crop
                if trackingQuality >= 8.75:
                    tracked_position = tracker.get_position()

                    t_x = int(tracked_position.left())
                    t_y = int(tracked_position.top())
                    t_w = int(tracked_position.width())
                    t_h = int(tracked_position.height())
                    roi = t_y, t_y+t_h, t_x, t_x+t_w
                    if (roi[0] > 0):
                        roi = followFaceSmoothing(roi)
                        lastroi = roi

                else:
                    trackingFace = 0

                # reset every 60 frames to refresh face tracking
                frames += 1
                if (frames > 59):
                    frames = 0
                    roi = []
                    trackingFace = 0

            # show the image on the screen
            if img.any():
                if (len(lastroi) > 0):
                    img = img[lastroi[0]:lastroi[1], lastroi[2]:lastroi[3]]
                cv2.imshow("Follow Face", img)
                cv2.resizeWindow("Follow Face", 320, 320)
    except:
        pass


if __name__ == '__main__':
    detectAndTrackLargestFace()