copenglcudajcuda

Draw image from vertex buffer object generated with CUDA using OpenGL


I am using CUDA to generate this ABGR output image. The image in question is stored in a uchar4 array. Each element of the array represents the color of each pixel in the image. Obviously, this output array is a 2D image but it is allocated in CUDA as a linear memory of interleaved bytes.

I know that CUDA can easily map this array to an OpenGL Vertex Buffer Object. My question is, assuming that I have the RGB value of every pixel in an image, along with the width and height of the image, how can I draw this image to screen using OpenGL?
I know that some kind of shader must be involved but since my knowledge is very little, I have no idea how a shader can use the color of each pixel, but map it to correct screen pixels.

I know I should increase my knowledge in OpenGL, but this seems like a trivial task. If there is an easy way for me to draw this image, I'd rather not spend much time learning OpenGL.


Solution

  • I finally figured out an easy way to do what I wanted. Unfortunately, I did not know about the existence of the sample that Robert was talking about on NVIDIA's website.

    Long story short, the easiest way to draw the image was to define a Pixel Buffer Object in OpenGL, register the buffer with CUDA and pass it as an output array of uchar4 to the CUDA kernel. Here is a quick pseudo-code based on JOGL and JCUDA that shows the steps involved. Most of the code was obtained from the sample on NVIDIA's website:

    1) Creaing the OpenGL buffers

    GL2 gl = drawable.getGL().getGL2();
    
    int[] buffer = new int[1];
    
    // Generate buffer
    gl.glGenBuffers(1, IntBuffer.wrap(buffer));
    glBuffer = buffer[0];
    
    // Bind the generated buffer
    gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, glBuffer);
    // Specify the size of the buffer (no data is pre-loaded in this buffer)
    gl.glBufferData(GL2.GL_ARRAY_BUFFER, imageWidth * imageHeight * 4, (Buffer)null, GL2.GL_DYNAMIC_DRAW);
    gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, 0);
    
    // The bufferResource is of type CUgraphicsResource and is defined as a class field
    this.bufferResource = new CUgraphicsResource();
    
    // Register buffer in CUDA
    cuGraphicsGLRegisterBuffer(bufferResource, glBuffer, CUgraphicsMapResourceFlags.CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
    

    2) Initialize the texture and set texture parameters

    GL2 gl = drawable.getGL().getGL2();
    int[] texture = new int[1];
    
    gl.glGenTextures(1, IntBuffer.wrap(texture));
    this.glTexture = texture[0];
    
    gl.glBindTexture(GL2.GL_TEXTURE_2D, glTexture);
    
    gl.glTexParameteri(GL2.GL_TEXTURE_2D, GL2.GL_TEXTURE_MIN_FILTER, GL2.GL_LINEAR);
    gl.glTexParameteri(GL2.GL_TEXTURE_2D, GL2.GL_TEXTURE_MAG_FILTER, GL2.GL_LINEAR);
    
    
    gl.glTexImage2D(GL2.GL_TEXTURE_2D, 0, GL2.GL_RGBA8, imageWidth, imageHeight, 0, GL2.GL_BGRA, GL2.GL_UNSIGNED_BYTE, (Buffer)null);
    
    gl.glBindTexture(GL2.GL_TEXTURE_2D, 0); 
    

    3) Run the CUDA kernel and display the results in OpenGL's display loop.

    this.runCUDA();
    
    GL2 gl = drawable.getGL().getGL2();
    
    gl.glBindBuffer(GL2.GL_PIXEL_UNPACK_BUFFER, glBuffer);
    
    gl.glBindTexture(GL2.GL_TEXTURE_2D, glTexture);
    gl.glTexSubImage2D(GL2.GL_TEXTURE_2D, 0, 0, 0,
                    imageWidth, imageHeight,
                    GL2.GL_RGBA, GL2.GL_UNSIGNED_BYTE, 0); //The last argument must be ZERO! NOT NULL! :-)
    
    gl.glBindBuffer(GL2.GL_PIXEL_PACK_BUFFER, 0);
    gl.glBindBuffer(GL2.GL_PIXEL_UNPACK_BUFFER, 0);
    
    gl.glBindTexture(GL2.GL_TEXTURE_2D, glTexture);
    gl.glEnable(GL2.GL_TEXTURE_2D);
    gl.glDisable(GL2.GL_DEPTH_TEST);
    gl.glDisable(GL2.GL_LIGHTING);
    gl.glTexEnvf(GL2.GL_TEXTURE_ENV, GL2.GL_TEXTURE_ENV_MODE, GL2.GL_REPLACE);
    
    gl.glMatrixMode(GL2.GL_PROJECTION);
    gl.glPushMatrix();
    gl.glLoadIdentity();
    gl.glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);
    
    gl.glMatrixMode(GL2.GL_MODELVIEW);
    gl.glLoadIdentity();
    
    gl.glViewport(0, 0, imageWidth, imageHeight);
    
    
    gl.glBegin(GL2.GL_QUADS);
        gl.glTexCoord2f(0.0f, 1.0f);
        gl.glVertex2f(-1.0f, -1.0f);
    
    
        gl.glTexCoord2f(1.0f, 1.0f);
        gl.glVertex2f(1.0f, -1.0f);
    
    
        gl.glTexCoord2f(1.0f, 0.0f);
        gl.glVertex2f(1.0f, 1.0f);
    
    
        gl.glTexCoord2f(0.0f, 0.0f);
        gl.glVertex2f(-1.0f, 1.0f);
    gl.glEnd();
    
    gl.glMatrixMode(GL2.GL_PROJECTION);
    gl.glPopMatrix();
    
    gl.glDisable(GL2.GL_TEXTURE_2D);
    

    3.5) The CUDA call:

    public void runCuda(GLAutoDrawable drawable) {
    
        devOutput = new CUdeviceptr();
        // Map the OpenGL buffer to a resource and then obtain a CUDA pointer to that resource
        cuGraphicsMapResources(1, new CUgraphicsResource[]{bufferResource}, null);
        cuGraphicsResourceGetMappedPointer(devOutput, new long[1], bufferResource);
    
        // Setup the kernel parameters making sure that the devOutput pointer is passed to the kernel
        Pointer kernelParams = 
                                .
                                .
                                .
                                .
    
        int gridSize = (int) Math.ceil(imageWidth * imageHeight / (double)DESC_BLOCK_SIZE);
    
        cuLaunchKernel(function,
                gridSize, 1, 1,
                DESC_BLOCK_SIZE, 1, 1,
                0, null,
                kernelParams, null);
        cuCtxSynchronize();
    
        // Unmap the buffer so that it can be used in OpenGL
        cuGraphicsUnmapResources(1, new CUgraphicsResource[]{bufferResource}, null);
    }
    

    PS: I thank Robert for providing the link to the sample. I also thank the people who downvoted my question without any useful feedback!