c++openglcudalibtorch

Weird behavior from CUDA (Libtorch) and OpenGL interop


I am trying to write functions to transform an OpenGL texture to a PyTorch tensor and back in a C++ app. To test that it works I added 128 to the tensor to basically brighten the image, and then rendered the resulting texture on a quad. It mostly works but I'm experiencing a weird behavior in which part of the texture is unaffected.

This is the original texture, and this is the texture after adding 128 to every element in the tensor. Note that like 1/4 of the image is not affected by this operation

These are the relevant sections of code. textureColorbuffer uses the format GL_RGB (which if I understood correctly is bitdepth 8 for each channel). This is where I call the functions and add to the tensor:

cudaGraphicsGLRegisterImage(&cudaResource, textureColorbuffer, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone);
torch::Tensor tensor = resourceToTensor(cudaResource, WIDTH, HEIGHT);
tensor = tensor + static_cast<unsigned char>(128);
tensorToResource(tensor, cudaResource, WIDTH, HEIGHT);

And these are the used functions:

torch::Tensor resourceToTensor(cudaGraphicsResource* cudaResource, int width, int height) {
    CUDA_CHECK_ERROR(cudaGraphicsMapResources(1, &cudaResource, 0));
    cudaArray* textureArray;
    CUDA_CHECK_ERROR(cudaGraphicsSubResourceGetMappedArray(&textureArray, cudaResource, 0, 0));

    unsigned char* devicePtr;
    size_t size = width * height * 3 * sizeof(unsigned char);
    CUDA_CHECK_ERROR(cudaMalloc(&devicePtr, size));

    CUDA_CHECK_ERROR(cudaMemcpyFromArray(devicePtr, textureArray, 0, 0, size, cudaMemcpyDeviceToDevice));
    auto options = torch::TensorOptions().dtype(torch::kUInt8).device(torch::kCUDA);
    torch::Tensor tensor = torch::from_blob(devicePtr, { height, width, 3 }, options);

    CUDA_CHECK_ERROR(cudaGraphicsUnmapResources(1, &cudaResource, 0));

    torch::Tensor clonedTensor = tensor.clone();

    CUDA_CHECK_ERROR(cudaFree(devicePtr));
    return clonedTensor;
}

void tensorToResource(torch::Tensor tensor, cudaGraphicsResource* cudaResource, int width, int height) {
    tensor = tensor.to(torch::kCUDA);

    CUDA_CHECK_ERROR(cudaGraphicsMapResources(1, &cudaResource, 0));
    cudaArray* textureArray;
    CUDA_CHECK_ERROR(cudaGraphicsSubResourceGetMappedArray(&textureArray, cudaResource, 0, 0));

    const unsigned char* devicePtr = tensor.data_ptr<unsigned char>();
    size_t size = width * height * 3 * sizeof(unsigned char);
    CUDA_CHECK_ERROR(cudaMemcpyToArray(textureArray, 0, 0, devicePtr, size, cudaMemcpyDeviceToDevice));

    CUDA_CHECK_ERROR(cudaGraphicsUnmapResources(1, &cudaResource, 0));
}

Does anybody know what could be the cause of this? Did I make a mistake with the sizes of the buffers and arrays?


Solution

  • In CUDA, 3-byte image pixels are not supported. Try

    size_t size = width * height * 4 * sizeof(unsigned char);
    

    This is an excerpt from the docs: CUDA docs