I have a distorted image in YUY2 data form, YUY2 comes under the family of YUV 4:2:2 (not 4:2:0).
And I have mapx
and mapy
(height-720, width-1280), which I obtained from
cv::fisheye::initUndistortRectifyMap(K, D, cv::Mat::eye(3, 3, CV_64F), new_K, Size, CV_32FC1, mapx, mapy);
How can I have undistorted YUY2? My final goal is to have undistorted YUY2 (not BGR).
I thought to perform below steps:
cv::cvtColor(YUY, BGR, cv::COLOR_YUV2BGR_YUY2);
\\ then perform remapping
\\ and convert back to YUY
But there is no conversion from BGR2YUY_YUY2.
Is there is any smarter way?
I tried modification in mapx & mapy to make it suitable for YUV422. Result is very good in terms of computation time. Just one remapping is required in real time. But the quality is not the best.
Then I tried YUV422 -> YUV444 -> remap(YUV444) -> YUV422 through libswscale, but again YUV conversion was taking time.
Finally I developed cuda kernels for YUV conversions. I attached below.
// nvcc -c -o colorConversion.o colorConversion.cu `pkg-config --libs --cflags opencv4`
// /usr/bin/g++ -g -O3 /home/jai/vscode/opencvCUDA/cuda3.cpp -o /home/jai/vscode/opencvCUDA/cuda3 colorConversion.o `pkg-config --libs --cflags opencv4` `pkg-config --libs --cflags gstreamer-1.0` `pkg-config --libs --cflags cuda-11.3` `pkg-config --libs --cflags cudart-11.3`
#include "colorConversion.h"
__global__ void kernel_YUY422toYUY(cv::cuda::PtrStepSz<uchar2> YUV422, cv::cuda::PtrStepSz<uchar3> YUV)
{
int i = blockIdx.y; // row
int j = blockDim.x * blockIdx.x + threadIdx.x; // col
if (threadIdx.x & 1) { // odd 1,3,5
// YUV[i * step3 + 3 * j] = YUV422[i * step2 + 2 * j]; // Y0
// YUV[i * step3 + 3 * j + 1] = YUV422[i * step2 + 2 * j - 1]; // Y0
// YUV[i * step3 + 3 * j + 2] = YUV422[i * step2 + 2 * j + 1]; // Y0
YUV(i, j).x = YUV422(i, j).x;
YUV(i, j).y = YUV422(i, j - 1).y;
YUV(i, j).z = YUV422(i, j).y;
} else { // even 0,2,4,
// YUV[i * step3 + 3 * j] = YUV422[i * step2 + 2 * j]; // Y0
// YUV[i * step3 + 3 * j + 1] = YUV422[i * step2 + 2 * j + 1]; // U0
// YUV[i * step3 + 3 * j + 2] = YUV422[i * step2 + 2 * j + 3]; // V0
YUV(i, j).x = YUV422(i, j).x;
YUV(i, j).y = YUV422(i, j).y;
YUV(i, j).z = YUV422(i, j+1).y;
}
}
void YUY422toYUY(const cv::cuda::GpuMat &YUV422gpu, cv::cuda::GpuMat &YUVgpu)
{
kernel_YUY422toYUY<<<dim3(2, YUVgpu.rows), dim3(YUVgpu.cols / 2)>>>(YUV422gpu, YUVgpu);
//cudaSafeCall(cudaGetLastError());
}
__global__ void kernel_YUYtoYUY422(cv::cuda::PtrStepSz<uchar3> YUV, cv::cuda::PtrStepSz<uchar2> YUV422)
{
int i = blockIdx.x; // row
int j = threadIdx.x*2; // col
YUV422(i, j).x = YUV(i, j).x;
YUV422(i, j).y = (YUV(i, j).y + YUV(i, j+1).y)/2;
YUV422(i, j+1).x = YUV(i, j+1).x;
YUV422(i, j+1).y = (YUV(i, j).z + YUV(i, j+1).z)/2;
}
void YUYtoYUY422(const cv::cuda::GpuMat &YUVgpu, cv::cuda::GpuMat &YUV422gpu)
{
kernel_YUYtoYUY422<<<dim3(YUV422gpu.rows), dim3(YUV422gpu.cols / 2)>>>(YUVgpu, YUV422gpu);
//cudaSafeCall(cudaGetLastError());
}
And then I do remapping using CUDA again with following lines of code:
YUV422GPU.upload(YUV422); // YUV422 #channel = 2
YUV1.create(H, W, CV_8UC3);
YUV2.create(H, W, CV_8UC3);
YUY422toYUY(YUV422GPU, YUV1);
cv::cuda::remap(YUV1, YUV2, mapxGPU, mapyGPU, interpolationMethod); // YUV remap
YUYtoYUY422(YUV2, YUV422GPU);
YUV422GPU.download(dst); // dst is the final YUV422. 2 channel image