I would appreciate some help understanding the output of a small educational program I put together. I am new to OpenCV and don't have much C++ experience.
The goal of the script is to perform the following:
I have the basic functionality working: I can load the image, perform DFT, view the output image and increase the radius of the circle (advancing through a for-loop with the circle radius following i
), and see the result of the inverse transform of the modified spectrum.
However I do not understand why the output is showing a vertically flipped copy of the input superimposed on the image (see example below with Lena.png
). This is not my intended result. When I imshow()
the inverse DFT result without applying the mask, I get a normal, non-doubled image. But after applying the mask, the IDFT output looks like this:
I am not looking for a quick solution: I would really appreciate if someone more experienced could ask leading questions to help me understand this result so that I can try to fix it myself.
#include <opencv2/core/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
using namespace cv;
void expand_img_to_optimal(Mat &padded, Mat &img);
void fourier_transform(Mat &img);
int main(int argc, char **argv)
{
Mat input_img;
input_img = imread("Lena.png" , IMREAD_GRAYSCALE);
if (input_img.empty())
{
fprintf(stderr, "Could not Open image\n\n");
return -1;
}
fourier_transform(input_img);
return 0;
}
void fourier_transform(Mat &img)
{
Mat padded;
expand_img_to_optimal(padded, img);
Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
Mat complexI;
merge(planes, 2, complexI);
dft(complexI, complexI, DFT_COMPLEX_OUTPUT);
// For-loop to increase mask circle radius incrementally
for (int i=0; i<400; i+=10) {
Mat mask = Mat::ones(complexI.size(), complexI.type());
mask.convertTo(mask, CV_8U);
Mat dest = Mat::ones(complexI.size(), complexI.type());
circle(mask, Point(mask.cols/2, mask.rows/2), i, 0, -1, 8, 0);
complexI.copyTo(dest, mask);
Mat inverseTransform;
idft(dest, inverseTransform, DFT_INVERSE|DFT_REAL_OUTPUT);
normalize(inverseTransform, inverseTransform, 0, 1, NORM_MINMAX);
imshow("Reconstructed", inverseTransform);
waitKey(0);
}
}
void expand_img_to_optimal(Mat &padded, Mat &img) {
int row = getOptimalDFTSize(img.rows);
int col = getOptimalDFTSize(img.cols);
copyMakeBorder(img, padded, 0, row - img.rows, 0, col - img.cols, BORDER_CONSTANT, Scalar::all(0));
}
Firstly I'd like to thank @Cris Luengo for his helpful input on implementing the ifftshift
in OpenCV.
In the end, the problem with my code was in this line:
Mat mask = Mat::ones(complexI.size(), complexI.type());
Instead of using the type of complexI
, it looks like I should have used the type of img
:
Mat mask = Mat::ones(complexI.size(), img.type());
Why? I'm not sure yet. Still trying to understand. Here is my complete code that is working how I intended:
#include <opencv2/core/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
using namespace cv;
void expand_img_to_optimal(Mat &padded, Mat &img);
void fourier_transform(Mat &img);
void ifft_shift(Mat &mask);
int main(int argc, char **argv)
{
Mat input_img;
input_img = imread("Lena.png" , IMREAD_GRAYSCALE);
if (input_img.empty())
{
fprintf(stderr, "Could not Open image\n\n");
return -1;
}
fourier_transform(input_img);
return 0;
}
void fourier_transform(Mat &img)
{
Mat padded;
expand_img_to_optimal(padded, img);
Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
Mat complexI;
merge(planes, 2, complexI);
dft(complexI, complexI, DFT_COMPLEX_OUTPUT);
for (float i=0; i<4000; i+=2) {
// Create disk mask matrix
Mat mask = Mat::ones(complexI.size(), CV_8U);
circle(mask, Point(mask.cols/2, mask.rows/2), i, 0, -1, 8, 0);
// Perform ifft shift
ifft_shift(mask);
// Destination matrix for masked spectrum
Mat dest;
complexI.copyTo(dest, mask);
// Perform inverse DFT
Mat inverseTransform;
idft(dest, inverseTransform, DFT_INVERSE|DFT_REAL_OUTPUT);
normalize(inverseTransform, inverseTransform, 0, 1, NORM_MINMAX);
imshow("Reconstructed", inverseTransform);
waitKey(0);
}
}
void expand_img_to_optimal(Mat &padded, Mat &img) {
int row = getOptimalDFTSize(img.rows);
int col = getOptimalDFTSize(img.cols);
copyMakeBorder(img, padded, 0, row - img.rows, 0, col - img.cols, BORDER_CONSTANT, Scalar::all(0));
}
void ifft_shift(Mat &mask) {
// input sizes
int sx = mask.cols;
int sy = mask.rows;
// input origin
int cx = sx / 2;
int cy = sy / 2;
// split the quadrants
Mat top_left(mask, Rect(0, 0, cx, cy));
Mat top_right(mask, Rect(cx, 0, sx - cx, cy));
Mat bottom_left(mask, Rect(0, cy, cx, sy - cy));
Mat bottom_right(mask, Rect(cx, cy, sx - cx, sy - cy));
// merge the quadrants in right order
Mat tmp1, tmp2;
hconcat(bottom_right, bottom_left, tmp1);
hconcat(top_right, top_left, tmp2);
vconcat(tmp1, tmp2, mask);
}