pythonopencvcomputer-visionopenpose

How to Detect "human hand Pose" using OpenPose or any other alternatives in python and OpenCV?


I am trying to detect human hand pose using OpenPose just like given in this video https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/.github/media/pose_face_hands.gif for hand part. I have downloaded the caffe model and prototxt file. Below is my code to implement the model.

import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

frame = cv2.imread("6.jpg")
frame_rgb=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
plt.imshow(frame_rgb)

threshold = 0.025
input_width, input_height = 368, 368

nPoints = 22
POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9],
              [9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17],
              [17, 18], [18, 19], [19, 20]]


net = cv2.dnn.readNetFromCaffe('pose_deploy_hand.prototxt', 'pose_iter_102000.caffemodel')

net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
origin_h, origin_w = frame_rgb.shape[:2]
blob = cv2.dnn.blobFromImage(frame_rgb, 1.0 / 255, (input_width, input_height), 0, swapRB=False, crop=False)
net.setInput(blob)
detections = net.forward()

H = detections.shape[2]
W = detections.shape[3]

points = []

for i in range(nPoints):
    probility_map = detections[0, i, :, :]
            #
    min_value, confidence, min_loc, point = cv2.minMaxLoc(probility_map)
            #
    x = int(origin_w * (point[0] / W))
    y = int(origin_h * (point[1] / H))
    if confidence > threshold:
        cv2.circle(frame_rgb, (x, y), 6, (255, 255, 0), -1, cv2.FILLED)
        #cv.putText(frame, "{}".format(i), (x, y-15), cv.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 1, cv.LINE_AA)
        points.append((x, y))
    else:
        points.append(None)
        
for pair in POSE_PAIRS:
    A, B = pair[0], pair[1]
    if points[A] and points[B]:
        cv2.line(frame_rgb, points[A], points[B], (0, 255, 255), 3, cv2.LINE_AA)        
        

plt.figure(figsize=(20,20))
plt.imshow(frame_rgb)

test Image: enter image description here

Output Image: enter image description here

I have tried different images too. But still, the output is too far away from desired.

Can you please suggest the modifications I need to do or any other alternative approach in python and openCV for detection of the hand part alone or with full body? Thanks in advance for your suggestions.


Solution

  • Try this code below

    import cv2
    import time
    import numpy as np
    
    
    protoFile = "hand/pose_deploy.prototxt"
    weightsFile = "hand/pose_iter_102000.caffemodel"
    nPoints = 22
    POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
    
    threshold = 0.2
    
    
    video_file = "videoMis.mp4"
    cap = cv2.VideoCapture(video_file)
    hasFrame, frame = cap.read()
    
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    
    aspect_ratio = frameWidth/frameHeight
    
    inHeight = 368
    inWidth = int(((aspect_ratio*inHeight)*8)//8)
    
    vid_writer = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 15, (frame.shape[1],frame.shape[0]))
    
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    k = 0
    while 1:
        k+=1
        t = time.time()
        hasFrame, frame = cap.read()
        frameCopy = np.copy(frame)
        if not hasFrame:
            cv2.waitKey()
            break
    
        inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                                  (0, 0, 0), swapRB=False, crop=False)
    
        net.setInput(inpBlob)
    
        output = net.forward()
    
        print("forward = {}".format(time.time() - t))
    
        # Empty list to store the detected keypoints
        points = []
    
        for i in range(nPoints):
            # confidence map of corresponding body's part.
            probMap = output[0, i, :, :]
            probMap = cv2.resize(probMap, (frameWidth, frameHeight))
    
            # Find global maxima of the probMap.
            minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
    
            if prob > threshold :
                cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
                cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, .8, (0, 0, 255), 2, lineType=cv2.LINE_AA)
    
                # Add the point to the list if the probability is greater than the threshold
                points.append((int(point[0]), int(point[1])))
            else :
                points.append(None)
    
        # Draw Skeleton
        for pair in POSE_PAIRS:
            partA = pair[0]
            partB = pair[1]
    
            if points[partA] and points[partB]:
                cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA)
                cv2.circle(frame, points[partA], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
                cv2.circle(frame, points[partB], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
    
        print("Time Taken for frame = {}".format(time.time() - t))
    
        # cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA)
        # cv2.putText(frame, "Hand Pose using OpenCV", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 50, 0), 2, lineType=cv2.LINE_AA)
        cv2.imshow('Output-Skeleton', frame)
        # cv2.imwrite("video_output/{:03d}.jpg".format(k), frame)
        key = cv2.waitKey(1)
        if key == 27:
            break
    
        print("total = {}".format(time.time() - t))
    
        vid_writer.write(frame)
    
    vid_writer.release()
    
    

    Code credits: https://learnopencv.com/hand-keypoint-detection-using-deep-learning-and-opencv/

    For a single image, you can try the code below

    from __future__ import division
    import cv2
    import time
    import numpy as np
    
    protoFile = "hand/pose_deploy.prototxt"
    weightsFile = "hand/pose_iter_102000.caffemodel"
    nPoints = 22
    POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    
    frame = cv2.imread("image.jpg")
    frameCopy = np.copy(frame)
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    aspect_ratio = frameWidth/frameHeight
    
    threshold = 0.1
    
    t = time.time()
    # input image dimensions for the network
    inHeight = 368
    inWidth = int(((aspect_ratio*inHeight)*8)//8)
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
    
    net.setInput(inpBlob)
    
    output = net.forward()
    print("time taken by network : {:.3f}".format(time.time() - t))
    
    # Empty list to store the detected keypoints
    points = []
    
    for i in range(nPoints):
        # confidence map of corresponding body's part.
        probMap = output[0, i, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
    
        # Find global maxima of the probMap.
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
    
        if prob > threshold :
            cv2.circle(frameCopy, (int(point[0]), int(point[1])), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
            cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
    
            # Add the point to the list if the probability is greater than the threshold
            points.append((int(point[0]), int(point[1])))
        else :
            points.append(None)
    
    # Draw Skeleton
    for pair in POSE_PAIRS:
        partA = pair[0]
        partB = pair[1]
    
        if points[partA] and points[partB]:
            cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2)
            cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
            cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
    
    
    cv2.imshow('Output-Keypoints', frameCopy)
    cv2.imshow('Output-Skeleton', frame)
    
    
    cv2.imwrite('Output-Keypoints.jpg', frameCopy)
    cv2.imwrite('Output-Skeleton.jpg', frame)
    
    print("Total time taken : {:.3f}".format(time.time() - t))
    
    cv2.waitKey(0)