c++cudanpp

Convert NV12 to BGR by NVIDIA Performance Primitives


I'm trying to convert NV12 image to BGR by npp, but in the final array i have zeroes.

int lumaStepBytes, chromaStepBytes;
int rgbStepBytes;
      
auto dpNV12LumaFrame = nppiMalloc_8u_C1(dec.GetWidth(), dec.GetHeight(), &lumaStepBytes);
auto dpNV12ChromaFrame = nppiMalloc_8u_C1(dec.GetWidth(), dec.GetChromaHeight(), &chromaStepBytes);
auto dpBGRFrame = nppiMalloc_8u_C3(dec.GetWidth(), dec.GetHeight(), &rgbStepBytes);
            
cudaMemcpy2D(dpNV12LumaFrame, lumaStepBytes, pFrame, dec.GetWidth(),
             dec.GetWidth(), dec.GetHeight(), cudaMemcpyKind::cudaMemcpyHostToDevice);
cudaMemcpy2D(dpNV12ChromaFrame, chromaStepBytes, pFrame + dec.GetLumaPlaneSize(), dec.GetWidth(),
             dec.GetWidth(), dec.GetChromaHeight(), cudaMemcpyKind::cudaMemcpyHostToDevice);

Npp8u *planesAddres[2];
planesAddres[0] = dpNV12LumaFrame;
planesAddres[1] = dpNV12ChromaFrame;

nppiNV12ToBGR_8u_P2C3R(planesAddres, lumaStepBytes,
                       dpBGRFrame, rgbStepBytes,
                       {dec.GetWidth(), dec.GetHeight()});

res.m_data.resize(dec.GetWidth() * dec.GetHeight() * 3);
cudaMemcpy2D(res.m_data.data(), dec.GetWidth(), dpBGRFrame, rgbStepBytes,
             dec.GetWidth(), dec.GetHeight(), cudaMemcpyKind::cudaMemcpyDeviceToHost);
nppiFree(dpBGRFrame);
nppiFree(dpNV12ChromaFrame);
nppiFree(dpNV12LumaFrame);

dec is a video decoder which gives pFrame in NV12 format and provide additional information about that, like offsets, dimensions, NV12 planes, etc. The same result I have if I use cu... and cuda... functions for allocating without alignment.

Do anybody have any ideas about the problem?


Solution

  • the problem went away when I changed the video card. GeForce 740 to 1080