cudacuda-arrays

cudaMemcpyToArray


I seem to have an issue with the function cudaMemcpyToArray. I have the following commands:

float *h_data = new float[bmp.width * bmp.height];
...
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray *cuArray;
cudaMallocArray(&cuArray, &channelDesc, bmp.width, bmp.height);
cudaMemcpyToArray(cuArray, 0, 0, h_data, bmp.width * bmp.height, cudaMemcpyHostToDevice);

As far as I understand, this should give me a 2D array in cuArray that has dimensions bmp.width by bmp.height from the data in h_data, which is a 1D array with dimensions bmp.width * bmp.height. Unfortunately, it just seg-faults on the last command. Am I doing something horribly wrong?


Solution

  • I think @lmortenson was on the right track, but we don't multiply width and height by sizeof(float), just one of them.

    1. You need to make sure that your bmp.width and bmp.height parameters conform to the limits specified here under Valid extents. These extents are in elements, not bytes.
    2. You need to pass width and height parameters to cudaMallocArray that are in elements, not bytes.
    3. You need to pass an overall size parameter to cudaMemcpyToArray that is in bytes, not elements, but this would generally be of the form width*height*sizeof(float)

    I created a simple reproducer based on your code and was able to reproduce the seg fault. The following code was my adaptation with the errors fixed, I believe:

    #include <stdio.h>
    #define cudaCheckErrors(msg) \
        do { \
            cudaError_t __err = cudaGetLastError(); \
            if (__err != cudaSuccess) { \
                fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                    msg, cudaGetErrorString(__err), \
                    __FILE__, __LINE__); \
                fprintf(stderr, "*** FAILED - ABORTING\n"); \
                exit(1); \
            } \
        } while (0)
    
    int main(){
      int width = 256;
      int height = 256;
      float *h_data = new float[width * height];
      cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
      cudaArray *cArray;
      cudaMallocArray(&cArray, &channelDesc, width, height, cudaArrayDefault);
      cudaCheckErrors("cudaMallocArray");
      cudaMemcpyToArray(cArray, 0, 0, h_data, width*height*sizeof(float), cudaMemcpyHostToDevice);
      cudaCheckErrors("cudaMemcpyToArray");
      return 0;
    }