python opencv pose-estimation opencv-solvepnp pose

Estimating the pose of a rectangular object is unstable

I'm using the following code to find the rotational and translation vector of a rectangular object. The height and the width of the rectangular object are 33 and 44 cm respectively. So I'm using the following code to create the object points.

width = 44
height = 33

objPoints = np.array(
    [(0, 0, 0), (width * 0.01, 0, 0), (width * 0.001, -(height* 0.001) , 0), (0, -(height* 0.001), 0)]
)

I use the following code to determine the rotation, translation vector.

def findPose(imagePoints):

    (success, rotation_vector, translation_vector) = cv2.solvePnP(objPoints, imagePoints, camera_matrix,
                                                                  dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE)
    print("Rotation Vector:\n {0}".format(rotation_vector))
    print("Translation Vector:\n {0}".format(translation_vector))
    (end_point2D, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), rotation_vector,
                                                     translation_vector, camera_matrix, dist_coeffs)

For some reason, the results are always wrong. am I creating the object points properly?

Solution

My guess is that you tried to convert the points from cm to meters, but converted the last two points from mm instead (scaled by 0.001 instead of 0.01).

I think you have meant to use:

objPoints = np.array(
    [(0, 0, 0), (width * 0.01, 0, 0), (width * 0.01, -(height* 0.01) , 0), (0, -(height* 0.01), 0)]
)

I am not a photogrammetry expert, but I think the solution is "scale invariant", so you may scale the coordinates by 1.0, and get the same result (I am not sure).

I used the code sample from Head Pose Estimation using OpenCV and Dlib

I put the coordinated (after scaling by 0.01 instead of 0.001), into MATLAB 3D plot.
I rotated the plot to be close to the head pose in the example.

Here is the code:

import numpy as np
import cv2

width = 44
height = 33

objPoints = np.array(
    #[(0, 0, 0), (width * 0.01, 0, 0), (width * 0.001, -(height* 0.001) , 0), (0, -(height* 0.001), 0)]
    [(0, 0, 0), (width * 0.01, 0, 0), (width * 0.01, -(height* 0.01) , 0), (0, -(height* 0.01), 0)]
)

# Read Image
im = cv2.imread("img.png");
size = im.shape

#2D image points. If you change the image, you need to change vector
# https://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/
#image_points = np.array([
#                            (359, 391),     # Nose tip
#                            (399, 561),     # Chin
#                            (337, 297),     # Left eye left corner
#                            (513, 301),     # Right eye right corne
#                            (345, 465),     # Left Mouth corner
#                            (453, 469)      # Right mouth corner
#                        ], dtype="double")

image_points = np.array([
                            (273, 100),
                            (478, 182),
                            (313, 275),
                            (107, 190)
                        ], dtype="double")


# 3D model points (from WEB sample).
# https://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/
#objPoints = np.array([
#                            (0.0, 0.0, 0.0),             # Nose tip
#                            (0.0, -330.0, -65.0),        # Chin
#                            (-225.0, 170.0, -135.0),     # Left eye left corner
#                            (225.0, 170.0, -135.0),      # Right eye right corne
#                            (-150.0, -150.0, -125.0),    # Left Mouth corner
#                            (150.0, -150.0, -125.0)      # Right mouth corner
#                        ])


# Camera internals

focal_length = size[1]
center = (size[1]/2, size[0]/2)
camera_matrix = np.array(
                         [[focal_length, 0, center[0]],
                         [0, focal_length, center[1]],
                         [0, 0, 1]], dtype = "double"
                         )



def findPose(imagePoints):
    dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion

    (success, rotation_vector, translation_vector) = cv2.solvePnP(objPoints, imagePoints, camera_matrix,
                                                                  dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE)
    print("Rotation Vector:\n {0}".format(rotation_vector))
    print("Translation Vector:\n {0}".format(translation_vector))
    (end_point2D, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), rotation_vector,
                                                     translation_vector, camera_matrix, dist_coeffs)

    # Project a 3D point (0, 0, 1000.0) onto the image plane.
    # We use this to draw a line sticking out of the nose
    for p in image_points:
        cv2.circle(im, (int(p[0]), int(p[1])), 5, (255,0,0), -1)

    p1 = ( int(image_points[0][0]), int(image_points[0][1]))
    p2 = ( int(end_point2D[0][0][0]), int(end_point2D[0][0][1]))

    cv2.line(im, p1, p2, (0,255,0), 3)


findPose(image_points)


# Display image
cv2.imshow("Output", im)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result:

Your post misses information, so I really can't say if the solution is correct or not.