I seem to have an issue with the function cudaMemcpyToArray. I have the following commands:
float *h_data = new float[bmp.width * bmp.height];
...
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray *cuArray;
cudaMallocArray(&cuArray, &channelDesc, bmp.width, bmp.height);
cudaMemcpyToArray(cuArray, 0, 0, h_data, bmp.width * bmp.height, cudaMemcpyHostToDevice);
As far as I understand, this should give me a 2D array in cuArray that has dimensions bmp.width by bmp.height from the data in h_data, which is a 1D array with dimensions bmp.width * bmp.height. Unfortunately, it just seg-faults on the last command. Am I doing something horribly wrong?
I think @lmortenson was on the right track, but we don't multiply width and height by sizeof(float)
, just one of them.
bmp.width
and bmp.height
parameters conform to the limits specified here under Valid extents
. These extents are in elements, not bytes.width*height*sizeof(float)
I created a simple reproducer based on your code and was able to reproduce the seg fault. The following code was my adaptation with the errors fixed, I believe:
#include <stdio.h>
#define cudaCheckErrors(msg) \
do { \
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
msg, cudaGetErrorString(__err), \
__FILE__, __LINE__); \
fprintf(stderr, "*** FAILED - ABORTING\n"); \
exit(1); \
} \
} while (0)
int main(){
int width = 256;
int height = 256;
float *h_data = new float[width * height];
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray *cArray;
cudaMallocArray(&cArray, &channelDesc, width, height, cudaArrayDefault);
cudaCheckErrors("cudaMallocArray");
cudaMemcpyToArray(cArray, 0, 0, h_data, width*height*sizeof(float), cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpyToArray");
return 0;
}