Hi Thanks for your attention, I'm trying to implement Harris Corner Detection Algorithm, and have been following the standard Algorithm given on most souces one of them being Wiki Page I follow all the procedure as per the algorithm, and I use sobel filters for computing gradient gx and gy, but the result ends up being a edge detector ( which ended up surprisingly better than the canny edge detector I implemented :O ) I wonder what I am doing wrong.
Core.h
#ifndef CORE
#define CORE
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
#endif
Utils.h
#ifndef UTILS
#define UTILS
#include "Core.h"
static void displayImage(Mat &img, unsigned int time = 0,
string title = "frame") {
imshow(title, img);
waitKey(time);
}
static bool isValidPoint(Mat &img, int x, int y) {
int rows = img.rows;
int cols = img.cols;
return (x >= 0 and x < rows and y >= 0 and y < cols);
}
static void performDiff(Mat img1, Mat img2) {
int rows = img1.rows;
int cols = img2.cols;
Mat diff(rows, cols, CV_8U);
bool zero = true;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
int d = (static_cast<int>(img1.at<u_char>(i, j)) -
static_cast<int>(img2.at<u_char>(i, j)));
diff.at<u_char>(i, j) = d;
if (d != 0)
zero = false;
}
}
displayImage(diff, 0, "diff image");
}
static int getValIntOf(Mat &img, int x, int y) {
return static_cast<int>(img.at<u_char>(x, y));
}
#endif
GaussianTransform.h
#ifndef GUASSIAN_TRANSFORM
#define GUASSIAN_TRANSFORM
static vector<vector<double>> getGuassianKernal(int size, double sigma) {
vector<vector<double>> Kernal(size, vector<double>(size, 0.0));
double sum = 0.0;
int center = size / 2;
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
int x = i - center;
int y = j - center;
Kernal[i][j] = exp(-(x * x + y * y) / (2 * sigma * sigma));
sum += Kernal[i][j];
}
}
if (sum != 0) {
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
Kernal[i][j] /= sum;
}
}
}
return Kernal;
}
static Mat guassianFilterTransform(Mat &img, int FiltSize = 3,
double Sigma = 1.5) {
vector<vector<double>> filter = getGuassianKernal(FiltSize, Sigma);
int trows = img.rows - FiltSize + 1;
int tcols = img.cols - FiltSize + 1;
Mat transformed(trows, tcols, CV_8U);
for (int i = 1; i <= trows - 2; i++) {
for (int j = 1; j <= tcols - 2; j++) {
double tval = 0;
for (int x = -1; x <= 1; x++) {
for (int y = -1; y <= 1; y++) {
tval = tval + (filter[x + 1][y + 1] *
static_cast<double>(img.at<u_char>(i + x, j + y)));
}
}
transformed.at<u_char>(i, j) = static_cast<u_char>(tval);
}
}
return (transformed);
}
#endif
Harris.cpp
#ifndef HARRIS_CORNER
#define HARRIS_CORNER
#include "../../BasicTransforms/GaussianTransform.h"
#include "../../Utils/Core/Core.h"
#include "../../Utils/Core/Utils.h"
double computeCornerResponseFunc(vector<vector<double>> &StructMat) {
double det =
StructMat[0][0] * StructMat[1][1] - StructMat[0][1] * StructMat[1][0];
double trace = StructMat[0][0] + StructMat[1][1];
return (det + (0.04) * (trace * trace));
}
Mat detectHarrisCorners(Mat &img) {
int filtSize = 3;
vector<vector<int>> filtery, filterx;
filtery = {{-1, -2, -1}, {0, 0, 0}, {1, 2, 1}};
filterx = {{-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1}};
int trows = img.rows - filtSize + 1;
int tcols = img.cols - filtSize + 1;
Mat gradMag(trows, tcols, CV_8U);
Mat gradDir(trows, tcols, CV_64F);
Mat result(trows, tcols, CV_8U);
double Thrs = 200;
vector<vector<double>> gaussianKernal = getGuassianKernal(3, 1.5);
for (int i = 1; i <= trows - 2; i++) {
for (int j = 1; j <= tcols - 2; j++) {
double gradX = 0;
double gradY = 0;
for (int x = -1; x <= 1; x++) {
for (int y = -1; y <= 1; y++) {
gradX =
gradX + gaussianKernal[x + 1][y + 1] *
(filterx[x + 1][y + 1] *
static_cast<double>(img.at<u_char>(i + x, j + y)));
gradY =
gradY + gaussianKernal[x + 1][y + 1] *
(filtery[x + 1][y + 1] *
static_cast<double>(img.at<u_char>(i + x, j + y)));
}
}
vector<vector<double>> strtMtrx(2, vector<double>(2, 0.0));
strtMtrx[0][0] = gradX * gradX;
strtMtrx[1][1] = gradY * gradY;
strtMtrx[0][1] = strtMtrx[1][0] = gradX * gradY;
double CornerResponseFunc = computeCornerResponseFunc(strtMtrx);
if (CornerResponseFunc >= Thrs) {
result.at<u_char>(i, j) = 255;
}
double tval = sqrt(gradX * gradX + gradY * gradY);
if (gradX == 0.00) {
gradDir.at<double>(i, j) = (gradY >= 0 ? M_PI / 2.0 : -M_PI / 2.0);
} else {
gradDir.at<double>(i, j) = atan(gradY / gradX);
}
gradDir.at<double>(i, j) *= (180.0 / M_PI);
gradDir.at<double>(i, j) = abs(gradDir.at<double>(i, j));
gradMag.at<u_char>(i, j) = static_cast<u_char>(tval);
}
}
for (int i = 1; i < trows - 1; i++) {
for (int j = 1; j < tcols - 1; j++) {
double angle = gradDir.at<double>(i, j);
double neigh1, neigh2;
if ((angle >= 0 and angle < 22.5) or (angle >= 157.5 && angle < 180)) {
neigh1 = gradMag.at<u_char>(i, j + 1);
neigh2 = gradMag.at<u_char>(i, j - 1);
} else if ((angle >= 22.5 and angle < 67.5)) {
neigh1 = gradMag.at<u_char>(i - 1, j + 1);
neigh2 = gradMag.at<u_char>(i + 1, j - 1);
} else if ((angle >= 67.5 and angle < 112.5)) {
neigh1 = gradMag.at<u_char>(i - 1, j);
neigh2 = gradMag.at<u_char>(i + 1, j);
} else if ((angle >= 112.5 and angle < 157.5)) {
neigh1 = gradMag.at<u_char>(i - 1, j - 1);
neigh2 = gradMag.at<u_char>(i + 1, j + 1);
} else {
neigh1 = neigh2 = 0;
}
if (gradMag.at<u_char>(i, j) >= max(neigh1, neigh2))
result.at<u_char>(i, j) = result.at<u_char>(i, j);
else
result.at<u_char>(i, j) = 0;
}
}
return result;
}
int main() {
string imPath =
"/home/panirpal/workspace/Projects/ComputerVision/data/chess2.jpg";
Mat img = imread(imPath, IMREAD_GRAYSCALE);
if (!img.empty()) {
Mat gaussian = guassianFilterTransform(img);
Mat out = detectHarrisCorners(gaussian);
displayImage(out);
} else
cerr << "image not found! exiting...";
}
#endif
Tried to play around with the magic numbers like threshold value and k value of 0.04 in computeCornerResponseFunc
To No use ends up detecting edges.
Edit : Made the suggested Changes, It is able to detect some corners now, But I feel Not quite there yet. Updated code. Although Non max impression isn't performed, but I feel it more of a post processing step on the base algorithm, here I think computing Mat GradXYSqSmooth = guassianFilterTransform(GradXY, 1.3);
is Introducing some noise, I wonder if there is something I'm doing wrong here, Or should that noise be filtered out! By using something like median filter?
#include <algorithm>
#include <math.h>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <vector>
using namespace std;
using namespace cv;
void displayImage(Mat &img, unsigned int time = 0, string title = "frame") {
imshow(title, img);
waitKey(time);
}
static double getDoubleValOf(Mat &img, int x, int y) {
return static_cast<double>(img.at<u_char>(x, y));
}
static double square(double x) { return x * x; }
static double computeGaussianFunc(double x, double y, double mu, double sigma) {
double val =
exp(-0.5 * ((square((x - mu) / sigma)) + square((y - mu) / sigma))) /
(2 * M_PI * sigma * sigma);
return val;
}
static vector<vector<double>> getGuassianKernal(double sigma) {
int size = 2 * ceil(3 * sigma) + 1;
vector<vector<double>> Kernal(size, vector<double>(size, 0.0));
double sum = 0.0;
int center = size / 2;
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
Kernal[i][j] = computeGaussianFunc(i, j, center, sigma);
sum += Kernal[i][j];
}
}
if (sum != 0) {
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
Kernal[i][j] /= sum;
}
}
}
return Kernal;
}
static Mat guassianFilterTransform(Mat &img, double Sigma) {
vector<vector<double>> filter = getGuassianKernal(Sigma);
int FiltSize = filter.size();
int trows = img.rows - FiltSize + 1;
int tcols = img.cols - FiltSize + 1;
// Final output
Mat transformed(trows, tcols, CV_8U);
for (int i = 0; i < trows; i++) {
for (int j = 0; j <tcols; j++) {
double tval = 0;
for (int x = 0; x <FiltSize; x++) {
for (int y = 0; y <FiltSize; y++) {
tval = tval + (filter[x][y] *
static_cast<double>(img.at<u_char>(i + x, j + y)));
tval = min(tval,255.0);
tval = max(tval,0.0);
}
}
transformed.at<u_char>(i,j) = static_cast<u_char>(round(tval));
}
}
return (transformed);
}
double computeCornerResponseFunc(vector<vector<double>> &StructMat) {
double det =
StructMat[0][0] * StructMat[1][1] - StructMat[0][1] * StructMat[1][0];
double trace = StructMat[0][0] + StructMat[1][1];
return (det - (0.04) * (trace * trace));
}
Mat computeStructureTensorMatrixAndCornerResponse(Mat &GradXSqSmooth,
Mat &GradYSqSmooth,
Mat &GradXYSqSmooth) {
int rows = GradXSqSmooth.rows;
int cols = GradXSqSmooth.cols;
Mat ResponseMat(rows, cols, CV_8U);
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
vector<vector<double>> STM = {{getDoubleValOf(GradXSqSmooth, i, j),
getDoubleValOf(GradXYSqSmooth, i, j)},
{getDoubleValOf(GradXYSqSmooth, i, j),
getDoubleValOf(GradYSqSmooth, i, j)}};
double CornerRes = computeCornerResponseFunc(STM);
if (CornerRes >= 200)
ResponseMat.at<u_char>(i, j) = 255;
else
ResponseMat.at<u_char>(i, j) = 0;
}
}
return ResponseMat;
}
// Try to Optimize for GradX this is not required we can directly compute
// GradXSq.
Mat detectHarrisCornersUpd(Mat &img) {
int filtSize = 3;
vector<vector<int>> filtery, filterx;
filtery = {{-1, -2, -1}, {0, 0, 0}, {1, 2, 1}};
filterx = {{-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1}};
int trows = img.rows - filtSize + 1;
int tcols = img.cols - filtSize + 1;
Mat GradX(trows, tcols, CV_64F);
Mat GradY(trows, tcols, CV_64F);
Mat gradDir(trows, tcols, CV_64F);
Mat result(trows, tcols, CV_8U);
double Thrs = 200;
/*
1) Derivative Calculation:
Calculate the horizontal (Ix) and vertical (Iy) derivatives of the grayscale
image using the Sobel operator: Ix = Sobel(I_gray, dx) Iy = Sobel(I_gray,
dy) dx and dy are the Sobel operators in the x and y directions,
respectively.
*/
for (int i = 1; i <= trows - 2; i++) {
for (int j = 1; j <= tcols - 2; j++) {
double gradX = 0;
double gradY = 0;
// Compute Sobel Gradients.
for (int x = -1; x <= 1; x++) {
for (int y = -1; y <= 1; y++) {
gradX = gradX + (filterx[x + 1][y + 1] *
static_cast<double>(img.at<u_char>(i + x, j + y)));
gradY = gradY + (filtery[x + 1][y + 1] *
static_cast<double>(img.at<u_char>(i + x, j + y)));
}
}
GradX.at<double>(i, j) = gradX;
GradY.at<double>(i, j) = gradY;
// GradDir required for non maximum supression
double tval = sqrt(gradX * gradX + gradY * gradY);
if (gradX == 0.00) {
gradDir.at<double>(i, j) = (gradY >= 0 ? M_PI / 2.0 : -M_PI / 2.0);
} else {
gradDir.at<double>(i, j) = atan(gradY / gradX);
}
gradDir.at<double>(i, j) *= (180.0 / M_PI);
gradDir.at<double>(i, j) = abs(gradDir.at<double>(i, j));
}
}
/*
3. Derivative Products:
Element wise product
Ix2 = Ix * Ix
Iy2 = Iy * Iy
Ixy = Ix * Iy
*/
Mat GradXSq(trows, tcols, CV_64F);
Mat GradYSq(trows, tcols, CV_64F);
Mat GradXY(trows, tcols, CV_64F);
for (int i = 0; i < trows; i++) {
for (int j = 0; j < tcols; j++) {
GradXSq.at<double>(i, j) =
GradX.at<double>(i, j) * GradX.at<double>(i, j);
GradYSq.at<double>(i, j) =
GradY.at<double>(i, j) * GradY.at<double>(i, j);
GradXY.at<double>(i, j) = GradX.at<double>(i, j) * GradY.at<double>(i, j);
GradXSq.at<u_char>(i, j) =
static_cast<u_char>(min(GradXSq.at<double>(i, j), 255.0));
GradYSq.at<u_char>(i, j) =
static_cast<u_char>(min(GradYSq.at<double>(i, j), 255.0));
GradXY.at<u_char>(i, j) =
static_cast<u_char>(min(GradXY.at<double>(i, j), 255.0));
}
}
/*
Gaussian Smoothing:
Apply Gaussian smoothing to each of the derivative products to reduce noise
sensitivity: Ix2_smooth = GaussianBlur(Ix2, kernel_size) Iy2_smooth =
GaussianBlur(Iy2, kernel_size) Ixy_smooth = GaussianBlur(Ixy, kernel_size)
*/
Mat GradXSqSmooth = guassianFilterTransform(GradXSq, 1.3);
displayImage(GradXSqSmooth);
Mat GradYSqSmooth = guassianFilterTransform(GradYSq, 1.3);
displayImage(GradYSqSmooth);
Mat GradXYSqSmooth = guassianFilterTransform(GradXY, 1.3);
displayImage(GradXYSqSmooth);
Mat StructTensorMat = computeStructureTensorMatrixAndCornerResponse(
GradXSqSmooth, GradYSqSmooth, GradXYSqSmooth);
return StructTensorMat;
}
int main() {
string imPath =
"/home/panirpal/workspace/Projects/ComputerVision/data/images/chess2.jpg";
Mat img = imread(imPath, IMREAD_GRAYSCALE);
if (!img.empty()) {
displayImage(img);
Mat gaussian = guassianFilterTransform(img,1.3);
Mat out = detectHarrisCornersUpd(gaussian);
displayImage(out);
} else
cerr << "image not found! exiting...";
return 0;
}
There are a few issues with your code. The most important one is that you are not applying the smoothing to the squared gradient images.
gradX =
gradX + gaussianKernal[x + 1][y + 1] *
(filterx[x + 1][y + 1] *
static_cast<double>(img.at<u_char>(i + x, j + y)));
Here you are computing the convolution of the image with a kernel formed by element-wise multiplication of the Sobel kernel and the Gaussian kernel. This just means you're not using the Sobel kernel, but something else entirely that I can't immediately analyze.
Element-wise multiplication and convolution are two different things, and they don't commute. The structure tensor M (as described in the link you provide) has, as the first element, G*(IxIx). Ix is the result of the Sobel filter. IxIx is the square of that (element-wise multiplication of the result, as you did). But the G* part is a convolution with the Gaussian. This smoothing has to be applied after the element-wise multiplication. This is not the same as (G*Ix)(G*Ix)!
Here's another issue:
static_cast<u_char>(tval)
This casts a double tval
to 8-bit unsigned integer. tval
is guaranteed to be non-negative, but it could very well be larger than 255. If so, the cast will wrap, the value 256 will be written as 0. You have to clamp the value first:
static_cast<u_char>(std::min(tval,255.0))
This one in performDiff
is worse:
int d = (static_cast<int>(img1.at<u_char>(i, j)) -
static_cast<int>(img2.at<u_char>(i, j)));
diff.at<u_char>(i, j) = d
The result of the subtraction cannot be represented in an 8-bit unsigned integer. Where img2
is larger, diff
will have some positive value, for example -1 will be represented as 255. You need to take the absolute difference, and do the same clamping as I showed above.
The call getGuassianKernal(3, 1.5)
does not return a Gaussian. Cutting off a Gaussian with sigma 1.5 to 3 pixels leaves something much more similar to a filter with uniform weights than a Gaussian. I explain this in more detail here. For a sigma of 1.5 we'd typically use a kernel of size 2*ceil(3*1.5)+1
, which is 11. At the very minimum you need 2*ceil(2*1.5)+1 = 7
(less precise but faster).