I am trying to detect human hand pose using OpenPose just like given in this video https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/.github/media/pose_face_hands.gif for hand part. I have downloaded the caffe model and prototxt file. Below is my code to implement the model.
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
frame = cv2.imread("6.jpg")
frame_rgb=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
plt.imshow(frame_rgb)
threshold = 0.025
input_width, input_height = 368, 368
nPoints = 22
POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9],
[9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17],
[17, 18], [18, 19], [19, 20]]
net = cv2.dnn.readNetFromCaffe('pose_deploy_hand.prototxt', 'pose_iter_102000.caffemodel')
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
origin_h, origin_w = frame_rgb.shape[:2]
blob = cv2.dnn.blobFromImage(frame_rgb, 1.0 / 255, (input_width, input_height), 0, swapRB=False, crop=False)
net.setInput(blob)
detections = net.forward()
H = detections.shape[2]
W = detections.shape[3]
points = []
for i in range(nPoints):
probility_map = detections[0, i, :, :]
#
min_value, confidence, min_loc, point = cv2.minMaxLoc(probility_map)
#
x = int(origin_w * (point[0] / W))
y = int(origin_h * (point[1] / H))
if confidence > threshold:
cv2.circle(frame_rgb, (x, y), 6, (255, 255, 0), -1, cv2.FILLED)
#cv.putText(frame, "{}".format(i), (x, y-15), cv.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 1, cv.LINE_AA)
points.append((x, y))
else:
points.append(None)
for pair in POSE_PAIRS:
A, B = pair[0], pair[1]
if points[A] and points[B]:
cv2.line(frame_rgb, points[A], points[B], (0, 255, 255), 3, cv2.LINE_AA)
plt.figure(figsize=(20,20))
plt.imshow(frame_rgb)
I have tried different images too. But still, the output is too far away from desired.
Can you please suggest the modifications I need to do or any other alternative approach in python and openCV for detection of the hand part alone or with full body? Thanks in advance for your suggestions.
Try this code below
import cv2
import time
import numpy as np
protoFile = "hand/pose_deploy.prototxt"
weightsFile = "hand/pose_iter_102000.caffemodel"
nPoints = 22
POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
threshold = 0.2
video_file = "videoMis.mp4"
cap = cv2.VideoCapture(video_file)
hasFrame, frame = cap.read()
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
aspect_ratio = frameWidth/frameHeight
inHeight = 368
inWidth = int(((aspect_ratio*inHeight)*8)//8)
vid_writer = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 15, (frame.shape[1],frame.shape[0]))
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
k = 0
while 1:
k+=1
t = time.time()
hasFrame, frame = cap.read()
frameCopy = np.copy(frame)
if not hasFrame:
cv2.waitKey()
break
inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
(0, 0, 0), swapRB=False, crop=False)
net.setInput(inpBlob)
output = net.forward()
print("forward = {}".format(time.time() - t))
# Empty list to store the detected keypoints
points = []
for i in range(nPoints):
# confidence map of corresponding body's part.
probMap = output[0, i, :, :]
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
# Find global maxima of the probMap.
minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
if prob > threshold :
cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, .8, (0, 0, 255), 2, lineType=cv2.LINE_AA)
# Add the point to the list if the probability is greater than the threshold
points.append((int(point[0]), int(point[1])))
else :
points.append(None)
# Draw Skeleton
for pair in POSE_PAIRS:
partA = pair[0]
partB = pair[1]
if points[partA] and points[partB]:
cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA)
cv2.circle(frame, points[partA], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
cv2.circle(frame, points[partB], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
print("Time Taken for frame = {}".format(time.time() - t))
# cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA)
# cv2.putText(frame, "Hand Pose using OpenCV", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 50, 0), 2, lineType=cv2.LINE_AA)
cv2.imshow('Output-Skeleton', frame)
# cv2.imwrite("video_output/{:03d}.jpg".format(k), frame)
key = cv2.waitKey(1)
if key == 27:
break
print("total = {}".format(time.time() - t))
vid_writer.write(frame)
vid_writer.release()
Code credits: https://learnopencv.com/hand-keypoint-detection-using-deep-learning-and-opencv/
For a single image, you can try the code below
from __future__ import division
import cv2
import time
import numpy as np
protoFile = "hand/pose_deploy.prototxt"
weightsFile = "hand/pose_iter_102000.caffemodel"
nPoints = 22
POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
frame = cv2.imread("image.jpg")
frameCopy = np.copy(frame)
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
aspect_ratio = frameWidth/frameHeight
threshold = 0.1
t = time.time()
# input image dimensions for the network
inHeight = 368
inWidth = int(((aspect_ratio*inHeight)*8)//8)
inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
net.setInput(inpBlob)
output = net.forward()
print("time taken by network : {:.3f}".format(time.time() - t))
# Empty list to store the detected keypoints
points = []
for i in range(nPoints):
# confidence map of corresponding body's part.
probMap = output[0, i, :, :]
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
# Find global maxima of the probMap.
minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
if prob > threshold :
cv2.circle(frameCopy, (int(point[0]), int(point[1])), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
# Add the point to the list if the probability is greater than the threshold
points.append((int(point[0]), int(point[1])))
else :
points.append(None)
# Draw Skeleton
for pair in POSE_PAIRS:
partA = pair[0]
partB = pair[1]
if points[partA] and points[partB]:
cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2)
cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
cv2.imshow('Output-Keypoints', frameCopy)
cv2.imshow('Output-Skeleton', frame)
cv2.imwrite('Output-Keypoints.jpg', frameCopy)
cv2.imwrite('Output-Skeleton.jpg', frame)
print("Total time taken : {:.3f}".format(time.time() - t))
cv2.waitKey(0)