cudargbyuvnpp

How can I properly use nppiYUV422ToRGB_8u_C2C3R()?


I am trying to convert a YUV422 image into RGB image using CPU and GPU.

[On CPU] I tried using cv::cvtColor() as shown below:

cv::cvtColor(mat_UYVY, mat_bgr, cv::COLOR_YUV2BGR_UYVY);

And it works fine. See the picture attached here.

(I removed the image because it contains somewhat private informations..)

[But on GPU] I tried using nppiYUV422ToRGB_8u_C2C3R() as shown below:

    NppStatus status = nppiYUV422ToRGB_8u_C2C3R(gpu_buff_UYVY,
                                                img_size.width*2,
                                                gpu_buff_RGB,
                                                img_size.width*3,
                                                roi);

But it produces some weird image as attached here.

Problematic result produced on GPU

I know opencv works with BGR image and the image nppiYUV422ToRGB_8u_C2C3R() produces is RGB image. Right. But the problem seem more than this. I tried converting RGB to BGR but prolem remained. There is more than something...

Can someone provide me some advice? I wish there be some advice for me. Thank you!

#include <iterator>
#include <fstream>
#include <iostream>
#include "opencv2/opencv.hpp"
#include "nppdefs.h"
#include "nppi_support_functions.h"
#include "nppi_color_conversion.h"



int main()
{
  cv::Size img_size(1920, 1080);
  unsigned char *buff_UYVY =
    new unsigned char[img_size.width * img_size.height * 2];
  unsigned char *buff_RGB =
    new unsigned char[img_size.width * img_size.height * 3];

  //file >>> buff_UYVY
  {
    std::string   file_uyvy("uyvy422.raw");
    std::ifstream stream_uyvy;
    stream_uyvy.open(file_uyvy, std::ios::in | std::ios::binary);

    if (!stream_uyvy.is_open())
    {
      std::cerr << "[ERROR] cannot open the raw file " << file_uyvy
        << std::endl;
      std::cerr << std::endl;
      assert(0);
    }
    stream_uyvy.read((char*)buff_UYVY, img_size.width*img_size.height*2);
    stream_uyvy.close();
  }


  //buff_UYVY >>> mat_bgr
  cv::Mat mat_UYVY(img_size, CV_8UC2, buff_UYVY);
  cv::Mat mat_bgr(img_size, CV_8UC3);
  cv::cvtColor(mat_UYVY, mat_bgr, cv::COLOR_YUV2BGR_UYVY);
  cv::imshow("BGR Image from CPU", mat_bgr);
  cv::imwrite("mat_bgr.bmp", mat_bgr);
  cv::imwrite("mat_bgr.jpg", mat_bgr);


  //buff_UYVY >>> buff_RGB
  {
    Npp8u* gpu_buff_UYVY;
    Npp8u* gpu_buff_RGB;
    cudaError_t err_cu_api;
    err_cu_api = cudaMalloc((void**)&gpu_buff_UYVY,
                            img_size.width*img_size.height*2);
    std::cout << "cudaMalloc1 : " << err_cu_api << std::endl;
    err_cu_api = cudaMemcpy((void*)gpu_buff_UYVY,
                            (const void*)buff_UYVY,
                            img_size.width*img_size.height*2,
                            cudaMemcpyHostToDevice);
    std::cout << "cudaMemcpy2 : " << err_cu_api << std::endl;


    err_cu_api = cudaMalloc((void**)&gpu_buff_RGB,
                            img_size.width*img_size.height*3);
    std::cout << "cudaMalloc3 : " << err_cu_api << std::endl;

    NppiSize roi = {img_size.width, img_size.height};
    NppStatus status = nppiYUV422ToRGB_8u_C2C3R(gpu_buff_UYVY,
                                                img_size.width*2,
                                                gpu_buff_RGB,
                                                img_size.width*3,
                                                roi);
    std::cout << "NppStatus : " << status << std::endl;

    err_cu_api = cudaMemcpy((void*) buff_RGB,
                            (const void*)gpu_buff_RGB,
                            img_size.width*img_size.height*3,
                            cudaMemcpyDeviceToHost);
    std::cout << "cudaMemcpy4 : " << err_cu_api << std::endl;
    cudaFree(gpu_buff_UYVY);
    cudaFree(gpu_buff_RGB);
  }

  cv::Mat mat_rgb(img_size, CV_8UC3, buff_RGB);
//cv::cvtColor(mat_rgb, mat_rgb, cv::COLOR_RGB2BGR);

  std::cout << "depth : " << mat_rgb.depth() << std::endl;
  std::cout << "channels : " << mat_rgb.channels() << std::endl;
  std::cout << "elemSize : " << mat_rgb.elemSize() << std::endl;
  std::cout << "step1 : " << mat_rgb.step1() << std::endl;
  std::cout << "type : " << mat_rgb.type() << std::endl;

  try {
    cv::imshow("RGB Image from GPU", mat_rgb);
    cv::imwrite("mat_rgb.bmp", mat_rgb);
    cv::imwrite("mat_rgb.jpg", mat_rgb);
  } catch( cv::Exception& e ) {
    const char* err_msg = e.what();
    std::cout << "exception caught #2: " << err_msg << std::endl;
  }

//  cv::waitKey(0);

  delete[] buff_UYVY;
  delete[] buff_RGB;

  return 0;
}

Output message is as shown below:

cudaMalloc1 : 0
cudaMemcpy2 : 0
cudaMalloc3 : 0
NppStatus : 0
cudaMemcpy4 : 0
depth : 0
channels : 3
elemSize : 3
step1 : 5760
type : 16

Solution

  • I think the main problem you have is that the openCV UYVY format does not match the storage order of the NPP YUV422 format.

    OpenCV UYVY storage format is: U0 Y0 V0 Y1

    NPP format is: Y0 U0 Y1 V0

    I wasn't able to locate any "raw" UYVY encoded image files on the internet, and you haven't provided one either. So I elected to use a synthetic image. Here is a test case:

    $ cat t30.cu
    #include <iterator>
    #include <fstream>
    #include <iostream>
    #include "opencv2/opencv.hpp"
    #include "nppdefs.h"
    #include "nppi_support_functions.h"
    #include "nppi_color_conversion.h"
    
    
    
    int main()
    {
      cv::Size img_size(1920, 1080);
      unsigned char *buff_UYVY =
        new unsigned char[img_size.width * img_size.height * 2];
      unsigned char *buff_RGB =
        new unsigned char[img_size.width * img_size.height * 3];
    #if 0
      //file >>> buff_UYVY
      {
        std::string   file_uyvy("uyvy422.raw");
        std::ifstream stream_uyvy;
        stream_uyvy.open(file_uyvy, std::ios::in | std::ios::binary);
    
        if (!stream_uyvy.is_open())
        {
          std::cerr << "[ERROR] cannot open the raw file " << file_uyvy
            << std::endl;
          std::cerr << std::endl;
          assert(0);
        }
        stream_uyvy.read((char*)buff_UYVY, img_size.width*img_size.height*2);
        stream_uyvy.close();
      }
    #endif
      // create synthetic R,G,B,Black image
      unsigned char r,g,b;
      for (int i = 0; i < img_size.height; i++) 
        for (int j = 0; j < img_size.width; j+=3) {
          if (j < img_size.width/3) {r = 200; g = 0; b = 0;}
          else if (j < img_size.width*2/3) {r = 0; g = 200; b = 0;}
          else {r = 0; g = 0; b = 200;}
          buff_RGB[i*img_size.width*3+j] = r;
          buff_RGB[i*img_size.width*3+j+1] = g;
          buff_RGB[i*img_size.width*3+j+2] = b;}
      Npp8u* gpu_buff_UYVY;
      Npp8u* gpu_buff_RGB;
      // use NPP to convert synthetic RGB image into NPP format YUV422
      cudaError_t err_cu_api;
      err_cu_api = cudaMalloc((void**)&gpu_buff_UYVY,
                                img_size.width*img_size.height*2);
      std::cout << "cudaMalloc1 : " << err_cu_api << std::endl;
      err_cu_api = cudaMalloc((void**)&gpu_buff_RGB,
                                img_size.width*img_size.height*3);
      std::cout << "cudaMalloc3 : " << err_cu_api << std::endl;
      err_cu_api = cudaMemcpy((void*)gpu_buff_RGB,
                                (const void*)buff_RGB,
                                img_size.width*img_size.height*3,
                                cudaMemcpyHostToDevice);
      std::cout << "cudaMemcpy2 : " << err_cu_api << std::endl;
      NppiSize roi = {img_size.width, img_size.height};
      NppStatus status = nppiRGBToYUV422_8u_C3C2R(gpu_buff_RGB,
                                                    img_size.width*3,
                                                    gpu_buff_UYVY,
                                                    img_size.width*2,
                                                    roi);
      std::cout << "NppStatus : " << status << std::endl;
      err_cu_api = cudaMemcpy((void*) buff_UYVY,
                                (const void*)gpu_buff_UYVY,
                                img_size.width*img_size.height*2,
                                cudaMemcpyDeviceToHost);
      std::cout << "cudaMemcpy4 : " << err_cu_api << std::endl;
      // convert NPP format YUV422 to UYVY for use by OpenCV
      for (int i = 0; i < img_size.width*img_size.height*2; i+=2){
        unsigned char v1 = buff_UYVY[i];
        unsigned char v2 = buff_UYVY[i+1];
        buff_UYVY[i+1] = v1;
        buff_UYVY[i] = v2;}
      //buff_UYVY >>> mat_bgr
      cv::Mat mat_UYVY(img_size, CV_8UC2, buff_UYVY);
      cv::Mat mat_bgr(img_size, CV_8UC3);
      cv::cvtColor(mat_UYVY, mat_bgr, cv::COLOR_YUV2BGR_UYVY);
      cv::imshow("BGR Image from CPU", mat_bgr);
      cv::imwrite("mat_bgr.bmp", mat_bgr);
      cv::imwrite("mat_bgr.jpg", mat_bgr);
      //convert UYVY OpenCV format back to NPP YUV422 format for use by NPP
      for (int i = 0; i < img_size.width*img_size.height*2; i+=2){
        unsigned char v1 = buff_UYVY[i];
        unsigned char v2 = buff_UYVY[i+1];
        buff_UYVY[i+1] = v1;
        buff_UYVY[i] = v2;}
    
    
      //buff_UYVY >>> buff_RGB
      {
        err_cu_api = cudaMemcpy((void*)gpu_buff_UYVY,
                                (const void*)buff_UYVY,
                                img_size.width*img_size.height*2,
                                cudaMemcpyHostToDevice);
        std::cout << "cudaMemcpy4 : " << err_cu_api << std::endl;
    
    
        status = nppiYUV422ToRGB_8u_C2C3R(gpu_buff_UYVY,
                                                    img_size.width*2,
                                                    gpu_buff_RGB,
                                                    img_size.width*3,
                                                    roi);
        std::cout << "NppStatus : " << status << std::endl;
    
        err_cu_api = cudaMemcpy((void*) buff_RGB,
                                (const void*)gpu_buff_RGB,
                                img_size.width*img_size.height*3,
                                cudaMemcpyDeviceToHost);
        std::cout << "cudaMemcpy4 : " << err_cu_api << std::endl;
      }
    
      cv::Mat mat_rgb(img_size, CV_8UC3, buff_RGB);
    //cv::cvtColor(mat_rgb, mat_rgb, cv::COLOR_RGB2BGR);
    
      std::cout << "depth : " << mat_rgb.depth() << std::endl;
      std::cout << "channels : " << mat_rgb.channels() << std::endl;
      std::cout << "elemSize : " << mat_rgb.elemSize() << std::endl;
      std::cout << "step1 : " << mat_rgb.step1() << std::endl;
      std::cout << "type : " << mat_rgb.type() << std::endl;
    
      try {
        cv::imshow("RGB Image from GPU", mat_rgb);
        cv::imwrite("mat_rgb.bmp", mat_rgb);
        cv::imwrite("mat_rgb.jpg", mat_rgb);
      } catch( cv::Exception& e ) {
        const char* err_msg = e.what();
        std::cout << "exception caught #2: " << err_msg << std::endl;
      }
    
    //  cv::waitKey(0);
    
      delete[] buff_UYVY;
      delete[] buff_RGB;
    
      return 0;
    }
    (base) [bob@localhost misc]$ nvcc -o t30 t30.cu -lnppicc -lopencv_core -lopencv_highgui -lopencv_imgcodecs -lopencv_imgproc
    (base) [bob@localhost misc]$ ./t30
    cudaMalloc1 : 0
    cudaMalloc3 : 0
    cudaMemcpy2 : 0
    NppStatus : 0
    cudaMemcpy4 : 0
    cudaMemcpy4 : 0
    NppStatus : 0
    cudaMemcpy4 : 0
    depth : 0
    channels : 3
    elemSize : 3
    step1 : 5760
    type : 16
    

    The output files are like this:

    mat_rgb.png:

    enter image description here mat_bgr.png:

    enter image description here The primary difference being the RGB <-> BGR mismatch that you already know about.

    For the remaining color differences, its also possible that the YUV colorspace for OpenCV doesn't quite match the colorspace for NPP. There are alternative YUV functions you can try in NPP if you wish, such as nppiCbYCr422ToBGR_709HDTV_8u_C2C3R.