I have two cameras and a coordinate system defined by stickers placed on my floor. I'm attempting to use an opencv stereo camera setup to calibrate each camera and then find the world millimeter location of a feature that is present in both camera's images. Below are the images of the coordinate system and feature to find taken by each camera.
There is also a very small red sticker, which is the feature I'm trying to find the world millimeter location of. This very small red sticker is located in camera 1's image at pixel location (1175, 440) and in camera 2's image at pixel location (1401, 298).
My basic approach is to call cv::calibrateCamera for each camera, then cv::stereoCalibrate, then cv::stereoRectify, and finally cv::triangulatePoints to get the world millimeter location of the feature. I'm getting an answer, but it seems to be nonsense. Below is the code I'm using.
// The four real world calibration points, expressed in millimeters
// The yellow circle is the origin
// The green circle is on the x-axis at 500mm away from the origin
// The red circle is on the y-axis at 500mm away from the origin
// The fourth calibration point is a small yellow sticker at (151mm, 194mm, 0mm)
// The z-axis is up, perpendicular to the floor
std::vector<cv::Point3f> _ObjectPointsWorldMM;
_ObjectPointsWorldMM.push_back(cv::Point3f(0.0f, 0.0f, 0.0f));
_ObjectPointsWorldMM.push_back(cv::Point3f(500.0f, 0.0f, 0.0f));
_ObjectPointsWorldMM.push_back(cv::Point3f(0.0f, 500.0f, 0.0f));
_ObjectPointsWorldMM.push_back(cv::Point3f(151.0f, 194.0f, 0.0f));
std::vector<std::vector<cv::Point3f>> ObjectPointsWorldMM;
ObjectPointsWorldMM.push_back(_ObjectPointsWorldMM);
//
// Camera 1 calibrateCamera()
//
// Get the camera 1 image
cv::Mat mCamera1Image = cv::imread(std::string("Camera1.jpg"), CV_LOAD_IMAGE_COLOR);
// The pixel locations in the camera 1 image that correspond to the four calibration points described above
std::vector<cv::Point2f> _Camera1ImagePointsPx;
_Camera1ImagePointsPx.push_back(cv::Point2f(791.0f, 220.0f)); // Corresponds to yellow origin sticker
_Camera1ImagePointsPx.push_back(cv::Point2f(864.0f, 643.0f)); // Corresponds to green x=500mm sticker
_Camera1ImagePointsPx.push_back(cv::Point2f(1277.0f, 113.0f)); // Corresponds to red y=500mm sticker
_Camera1ImagePointsPx.push_back(cv::Point2f(1010.0f, 287.0f)); // Corresponds to small yellow sticker at fourth calibration point (see above)
std::vector<std::vector<cv::Point2f>> Camera1ImagePointsPx;
Camera1ImagePointsPx.push_back(_Camera1ImagePointsPx);
// Calibrate camera 1
cv::Mat mCamera1IntrinsicMatrix;
cv::Mat mCamera1DistortionCoefficients;
std::vector<cv::Mat> Camera1RotationVecs;
std::vector<cv::Mat> Camera1TranslationVecs;
const double dCamera1RMSReProjectionError = cv::calibrateCamera(
ObjectPointsWorldMM, // In: The world millimeter locations of the four calibration points
Camera1ImagePointsPx, // In: The pixel locations in the camera 1 image that correspond to the four calibration points
mCamera1Image.size(), // In: The size of the camera 1 calibration image
mCamera1IntrinsicMatrix, mCamera1DistortionCoefficients, // Out: The camera intrinsic matrix and distortion coefficients
Camera1RotationVecs, Camera1TranslationVecs // Out: The camera rotation and translation vectors
);
//
// Camera 2 calibrateCamera()
//
// Get the camera 2 image
cv::Mat mCamera2Image = cv::imread(std::string("Camera2.jpg"), CV_LOAD_IMAGE_COLOR);
// Check assumptions
assert((mCamera1Image.size() == mCamera2Image.size()));
// The pixel locations in the camera 2 image that correspond to the four calibration points described above
std::vector<cv::Point2f> _Camera2ImagePointsPx;
_Camera2ImagePointsPx.push_back(cv::Point2f(899.0f, 439.0f)); // Corresponds to yellow origin sticker
_Camera2ImagePointsPx.push_back(cv::Point2f(1472.0f, 608.0f)); // Corresponds to green x=500mm sticker
_Camera2ImagePointsPx.push_back(cv::Point2f(1101.0f, 74.0f)); // Corresponds to red y=500mm sticker
_Camera2ImagePointsPx.push_back(cv::Point2f(1136.0f, 322.0f)); // Corresponds to small yellow sticker at fourth calibration point (see above)
std::vector<std::vector<cv::Point2f>> Camera2ImagePointsPx;
Camera2ImagePointsPx.push_back(_Camera2ImagePointsPx);
// Calibrate camera 2
cv::Mat mCamera2IntrinsicMatrix;
cv::Mat mCamera2DistortionCoefficients;
std::vector<cv::Mat> Camera2RotationVecs;
std::vector<cv::Mat> Camera2TranslationVecs;
const double dCamera2RMSReProjectionError = cv::calibrateCamera(
ObjectPointsWorldMM, // In: The world millimeter locations of the four calibration points
Camera2ImagePointsPx, // In: The pixel locations in the camera 2 image that correspond to the four calibration points
mCamera2Image.size(), // In: The size of the camera 2 calibration image
mCamera2IntrinsicMatrix, mCamera2DistortionCoefficients, // Out: The camera intrinsic matrix and distortion coefficients
Camera2RotationVecs, Camera2TranslationVecs // Out: The camera rotation and translation vectors
);
//
// stereoCalibrate()
//
// Calibrate the stereo camera set up
cv::Mat InterCameraRotationMatrix, InterCameraTranslationMatrix;
cv::Mat InterCameraEssentialMatrix, InterCameraFundamentalMatrix;
const double dStereoCalReProjectionError = cv::stereoCalibrate(
ObjectPointsWorldMM, // In: The world millimeter locations of the four calibration points
Camera1ImagePointsPx, // In: The pixel locations in the camera 1 image that correspond to the four calibration points
Camera2ImagePointsPx, // In: The pixel locations in the camera 2 image that correspond to the four calibration points
mCamera1IntrinsicMatrix, mCamera1DistortionCoefficients, // In: Camera 1's intrinsic matrix and distortion coefficients
mCamera2IntrinsicMatrix, mCamera2DistortionCoefficients, // In: Camera 2's intrinsic matrix and distortion coefficients
mCamera1Image.size(), // In: The size of each image
InterCameraRotationMatrix, InterCameraTranslationMatrix, // Out: The inter-camera rotation and translation matrices
InterCameraEssentialMatrix, InterCameraFundamentalMatrix // Out: The inter-camera essential and fundamental matrices
);
//
// stereoRectify()
//
// Compute rectification transforms for each head of the calibrated stereo camera
cv::Mat Camera1RectificationTransform, Camera2RectificationTransform;
cv::Mat Camera1ProjectionMatrix, Camera2ProjectionMatrix;
cv::Mat DisparityToDepthMappingMatrix;
cv::stereoRectify(
mCamera1IntrinsicMatrix, mCamera1DistortionCoefficients, // In: Camera 1's intrinsic matrix and distortion coefficients
mCamera2IntrinsicMatrix, mCamera2DistortionCoefficients, // In: Camera 2's intrinsic matrix and distortion coefficients
mCamera1Image.size(), // In: The size of each image
InterCameraRotationMatrix, InterCameraTranslationMatrix, // In: The inter-camera rotation and translation matrices
Camera1RectificationTransform, Camera2RectificationTransform, // Out: The 3x3 rectification transforms for each camera
Camera1ProjectionMatrix, Camera2ProjectionMatrix, // Out: The 3x4 projection matrices for each camera
DisparityToDepthMappingMatrix // Out: The 4x4 disparity-to-depth mapping matrix
);
//
// triangulatePoints()
//
// The pixel location of each feature to find in the camera 1 image
std::vector<cv::Point2f> FeaturePointsCamera1;
FeaturePointsCamera1.push_back(cv::Point2f(1175.0f, 440.0f)); // The pixel location of the small red sticker
// The pixel location of each feature to find in the camera 2 image
std::vector<cv::Point2f> FeaturePointsCamera2;
FeaturePointsCamera2.push_back(cv::Point2f(1401.0f, 298.0f)); // The pixel location of the small red sticker
// Perform triangulation to find the feature(s)
cv::Mat FeatureLocationHomog;
cv::triangulatePoints(
Camera1ProjectionMatrix, Camera2ProjectionMatrix, // In: The 3x4 projection matrices for each camera
FeaturePointsCamera1, FeaturePointsCamera2, // In: The feature pixel points for each camera
FeatureLocationHomog // Out: The reconstructed feature locations in homogeneous coordinates
);
// Check assumptions
assert((FeatureLocationHomog.cols == static_cast<int>(FeaturePointsCamera1.size())));
The homogeneous coordinates I'm getting back are:
[-0.60382962, -0.0076272688, 0.79707688, 0.00036873418]
My understanding of homogeneous coordinates is that to convert [a, b, c, d] to non-homogeneous, the answer would be [a/d, b/d, c/d], therefore my homogeneous vector would be:
[-1637.574, -20.685, 2161.657]
which is clearly not correct. Can anyone point me in the right direction?
Ok, here I believe I have a better answer than my original. Hopefully it'll be of some use to someone.
// The four real world calibration points, expressed in millimeters
// The yellow circle is the origin
// The green circle is on the x-axis at 500mm away from the origin
// The red circle is on the y-axis at 500mm away from the origin
// The fourth calibration point is a small yellow sticker at (151mm, 194mm, 0mm)
// The z-axis is up, perpendicular to the floor
std::vector<cv::Point3f> _ObjectPointsWorldMM;
_ObjectPointsWorldMM.push_back(cv::Point3f(0.0f, 0.0f, 0.0f));
_ObjectPointsWorldMM.push_back(cv::Point3f(500.0f, 0.0f, 0.0f));
_ObjectPointsWorldMM.push_back(cv::Point3f(0.0f, 500.0f, 0.0f));
_ObjectPointsWorldMM.push_back(cv::Point3f(151.0f, 194.0f, 0.0f));
std::vector<std::vector<cv::Point3f>> ObjectPointsWorldMM;
ObjectPointsWorldMM.push_back(_ObjectPointsWorldMM);
//
// Camera 1 calibrateCamera()
//
// Get the camera 1 image
cv::Mat mCamera1Image = cv::imread(std::string("Camera1.jpg"), CV_LOAD_IMAGE_COLOR);
// The pixel locations in the camera 1 image that correspond to the four calibration points described above
std::vector<cv::Point2f> _Camera1ImagePointsPx;
_Camera1ImagePointsPx.push_back(cv::Point2f(791.0f, 220.0f)); // Corresponds to yellow origin sticker
_Camera1ImagePointsPx.push_back(cv::Point2f(864.0f, 643.0f)); // Corresponds to green x=500mm sticker
_Camera1ImagePointsPx.push_back(cv::Point2f(1277.0f, 113.0f)); // Corresponds to red y=500mm sticker
_Camera1ImagePointsPx.push_back(cv::Point2f(1010.0f, 287.0f)); // Corresponds to small yellow sticker at fourth calibration point (see above)
std::vector<std::vector<cv::Point2f>> Camera1ImagePointsPx;
Camera1ImagePointsPx.push_back(_Camera1ImagePointsPx);
// Calibrate camera 1
cv::Mat mCamera1IntrinsicMatrix;
cv::Mat mCamera1DistortionCoefficients;
std::vector<cv::Mat> Camera1RotationVecs;
std::vector<cv::Mat> Camera1TranslationVecs;
cv::calibrateCamera(
ObjectPointsWorldMM, // In: The world millimeter locations of the four calibration points
Camera1ImagePointsPx, // In: The pixel locations in the camera 1 image that correspond to the four calibration points
mCamera1Image.size(), // In: The size of the camera 1 calibration image
mCamera1IntrinsicMatrix, mCamera1DistortionCoefficients, // Out: The camera intrinsic matrix and distortion coefficients
Camera1RotationVecs, Camera1TranslationVecs // Out: The camera rotation and translation vectors
);
//
// Camera 2 calibrateCamera()
//
// Get the camera 2 image
cv::Mat mCamera2Image = cv::imread(std::string("Camera2.jpg"), CV_LOAD_IMAGE_COLOR);
// Check assumptions
assert((mCamera1Image.size() == mCamera2Image.size()));
// The pixel locations in the camera 2 image that correspond to the four calibration points described above
std::vector<cv::Point2f> _Camera2ImagePointsPx;
_Camera2ImagePointsPx.push_back(cv::Point2f(899.0f, 439.0f)); // Corresponds to yellow origin sticker
_Camera2ImagePointsPx.push_back(cv::Point2f(1472.0f, 608.0f)); // Corresponds to green x=500mm sticker
_Camera2ImagePointsPx.push_back(cv::Point2f(1101.0f, 74.0f)); // Corresponds to red y=500mm sticker
_Camera2ImagePointsPx.push_back(cv::Point2f(1136.0f, 322.0f)); // Corresponds to small yellow sticker at fourth calibration point (see above)
std::vector<std::vector<cv::Point2f>> Camera2ImagePointsPx;
Camera2ImagePointsPx.push_back(_Camera2ImagePointsPx);
// Calibrate camera 2
cv::Mat mCamera2IntrinsicMatrix;
cv::Mat mCamera2DistortionCoefficients;
std::vector<cv::Mat> Camera2RotationVecs;
std::vector<cv::Mat> Camera2TranslationVecs;
cv::calibrateCamera(
ObjectPointsWorldMM, // In: The world millimeter locations of the four calibration points
Camera2ImagePointsPx, // In: The pixel locations in the camera 2 image that correspond to the four calibration points
mCamera2Image.size(), // In: The size of the camera 2 calibration image
mCamera2IntrinsicMatrix, mCamera2DistortionCoefficients, // Out: The camera intrinsic matrix and distortion coefficients
Camera2RotationVecs, Camera2TranslationVecs // Out: The camera rotation and translation vectors
);
//
// stereoCalibrate()
//
// Calibrate the stereo camera set up
cv::Mat InterCameraRotationMatrix, InterCameraTranslationMatrix;
cv::Mat InterCameraEssentialMatrix, InterCameraFundamentalMatrix;
cv::stereoCalibrate(
ObjectPointsWorldMM, // In: The world millimeter locations of the four calibration points
Camera1ImagePointsPx, // In: The pixel locations in the camera 1 image that correspond to the four calibration points
Camera2ImagePointsPx, // In: The pixel locations in the camera 2 image that correspond to the four calibration points
mCamera1IntrinsicMatrix, mCamera1DistortionCoefficients, // In: Camera 1's intrinsic matrix and distortion coefficients
mCamera2IntrinsicMatrix, mCamera2DistortionCoefficients, // In: Camera 2's intrinsic matrix and distortion coefficients
mCamera1Image.size(), // In: The size of each image
InterCameraRotationMatrix, InterCameraTranslationMatrix, // Out: The inter-camera rotation and translation matrices
InterCameraEssentialMatrix, InterCameraFundamentalMatrix // Out: The inter-camera essential and fundamental matrices
);
//
// solvePNP() to get the final projection matrices and R|t matrix
// to convert points from the camera frame to the world frame
//
// Build the identity R|t matrix
cv::Mat IdentityRT(3, 4, CV_64F);
IdentityRT.at<double>(0, 0) = 1.0;
IdentityRT.at<double>(1, 0) = 0.0;
IdentityRT.at<double>(2, 0) = 0.0;
IdentityRT.at<double>(0, 1) = 0.0;
IdentityRT.at<double>(1, 1) = 1.0;
IdentityRT.at<double>(2, 1) = 0.0;
IdentityRT.at<double>(0, 2) = 0.0;
IdentityRT.at<double>(1, 2) = 0.0;
IdentityRT.at<double>(2, 2) = 1.0;
IdentityRT.at<double>(0, 3) = 0.0;
IdentityRT.at<double>(1, 3) = 0.0;
IdentityRT.at<double>(2, 3) = 0.0;
// Compute the camera 1 projection matrix
const cv::Mat Camera1ProjectionMatrix = (mCamera1IntrinsicMatrix * IdentityRT);
// Build the camera 2 R|t matrix
cv::Mat Camera2RT;
cv::hconcat(InterCameraRotationMatrix, InterCameraTranslationMatrix, Camera2RT);
// Compute the camera 2 projection matrix
const cv::Mat Camera2ProjectionMatrix = (mCamera2IntrinsicMatrix * Camera2RT);
// Solve for the rotation and translation vectors
cv::Mat SolvedRotation, SolvedTranslation;
cv::solvePnP(
ObjectPointsWorldMM[0], // In: A vector of world object points
Camera1ImagePointsPx[0], // In: The corresponding vector of the camera's image points
mCamera1IntrinsicMatrix, // In: The corresponding camera's intrinsic matrix
mCamera1DistortionCoefficients, // In: The corresponding camera's distortion coefficients
SolvedRotation, // Out: The computed rotation vector
SolvedTranslation // Out: The computed translation vector
);
// Get the solved rotation vector as a 3x3 matrix
cv::Mat SolvedRotationMatrix;
cv::Rodrigues(SolvedRotation, SolvedRotationMatrix);
// Build the R|t matrix
cv::Mat SolvedRT(4, 4, CV_64F);
SolvedRT.at<double>(0, 0) = SolvedRotationMatrix.at<double>(0, 0);
SolvedRT.at<double>(0, 1) = SolvedRotationMatrix.at<double>(0, 1);
SolvedRT.at<double>(0, 2) = SolvedRotationMatrix.at<double>(0, 2);
SolvedRT.at<double>(1, 0) = SolvedRotationMatrix.at<double>(1, 0);
SolvedRT.at<double>(1, 1) = SolvedRotationMatrix.at<double>(1, 1);
SolvedRT.at<double>(1, 2) = SolvedRotationMatrix.at<double>(1, 2);
SolvedRT.at<double>(2, 0) = SolvedRotationMatrix.at<double>(2, 0);
SolvedRT.at<double>(2, 1) = SolvedRotationMatrix.at<double>(2, 1);
SolvedRT.at<double>(2, 2) = SolvedRotationMatrix.at<double>(2, 2);
SolvedRT.at<double>(0, 3) = SolvedTranslation.at<double>(0, 0);
SolvedRT.at<double>(1, 3) = SolvedTranslation.at<double>(1, 0);
SolvedRT.at<double>(2, 3) = SolvedTranslation.at<double>(2, 0);
SolvedRT.at<double>(3, 0) = 0.0;
SolvedRT.at<double>(3, 1) = 0.0;
SolvedRT.at<double>(3, 2) = 0.0;
SolvedRT.at<double>(3, 3) = 1.0;
// Invert this R|t matrix
cv::Mat SolvedRTInverted = SolvedRT.inv();
//
// triangulatePoints()
//
// The pixel location of each feature to find in the camera 1 image
std::vector<cv::Point2f> FeaturePointsCamera1;
FeaturePointsCamera1.push_back(cv::Point2f(1175.0f, 440.0f)); // The pixel location of the small red sticker
// The pixel location of each feature to find in the camera 2 image
std::vector<cv::Point2f> FeaturePointsCamera2;
FeaturePointsCamera2.push_back(cv::Point2f(1401.0f, 298.0f)); // The pixel location of the small red sticker
// Perform triangulation to find the feature(s)
cv::Mat FeatureLocationHomog;
cv::triangulatePoints(
Camera1ProjectionMatrix, Camera2ProjectionMatrix, // In: The 3x4 projection matrices for each camera
FeaturePointsCamera1, FeaturePointsCamera2, // In: The feature pixel points for each camera
FeatureLocationHomog // Out: The reconstructed feature locations in homogeneous coordinates
);
//
// Convert from the camera frame coordinates returned by triangulatePoints to the world frame of reference
//
// Check assumptions
assert((1 == FeatureLocationHomog.cols));
// Get the homogeneous coordinates
cv::Mat FeatureHomog = FeatureLocationHomog.col(0);
// Check assumptions
assert((1 == FeatureHomog.cols) && (4 == FeatureHomog.rows));
// Get the divisor for computing camera frame coordinates from the homogeneous coordinates
long double dDivisor = static_cast<long double>(FeatureHomog.at<float>(3));
// If we have a non-zero divisor
if (0.0L != dDivisor)
{
// Get the camera frame location
const long double dCameraFrameX = (static_cast<long double>(FeatureHomog.at<float>(0)) / dDivisor);
const long double dCameraFrameY = (static_cast<long double>(FeatureHomog.at<float>(1)) / dDivisor);
const long double dCameraFrameZ = (static_cast<long double>(FeatureHomog.at<float>(2)) / dDivisor);
// Pack these camera frame coordinates into a vector
cv::Mat TriangulatedResultsCameraFrame(4, 1, CV_64F);
TriangulatedResultsCameraFrame.at<double>(0, 0) = static_cast<double>(dCameraFrameX);
TriangulatedResultsCameraFrame.at<double>(0, 1) = static_cast<double>(dCameraFrameY);
TriangulatedResultsCameraFrame.at<double>(0, 2) = static_cast<double>(dCameraFrameZ);
TriangulatedResultsCameraFrame.at<double>(0, 3) = 1.0;
// Compute the final world location in homogeneous coordinates
const cv::Mat FinalWorldLocation = (SolvedRTInverted * TriangulatedResultsCameraFrame);
// Check assumptions
assert((4 == FinalWorldLocation.rows));
assert((1 == FinalWorldLocation.cols));
// Get the divisor to convert from homogeneous coordinates to the final world frame
dDivisor = static_cast<long double>(FinalWorldLocation.at<double>(3, 0));
// If we have a non-zero divisor
if (0.0L != dDivisor)
{
// Compute the final answer
const long double dFinal3DLocX = static_cast<double>(static_cast<long double>(FinalWorldLocation.at<double>(0, 0)) / dDivisor);
const long double dFinal3DLocY = static_cast<double>(static_cast<long double>(FinalWorldLocation.at<double>(1, 0)) / dDivisor);
const long double dFinal3DLocZ = static_cast<double>(static_cast<long double>(FinalWorldLocation.at<double>(2, 0)) / dDivisor);
printf("\n\tFinal answer: (%Lf, %Lf, %Lf)\n\n", dFinal3DLocX, dFinal3DLocY, dFinal3DLocZ);
}
}