c++cuda

Cuda Julia example throws error: calling a __host__ function from a __device__ is not allowed. Why?


Here is the code, if you compile it with nvcc, it gives me as error:

calling a __host__ function("cuComplex::cuComplex(float, float)") from a __device__ function("cuComplex::operator *") is not allowed`

Below is the code, you can download all files from https://developer.nvidia.com/cuda-example (this is the julia example). Why it gives this error? Any comments are appreciated. I didn't modifiy the code at all, just downloaded it from the link above, book.h and cpu_bitmap.h were added to my cuda-samples/Common directory.

#include "cuda-samples/Common/book.h"
#include "cuda-samples/Common/cpu_bitmap.h"

#define DIM 1000

struct cuComplex {
    float   r;
    float   i;
    cuComplex( float a, float b ) : r(a), i(b)  {}
    __device__ float magnitude2( void ) {
        return r * r + i * i;
    }
    __device__ cuComplex operator*(const cuComplex& a) {
        return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);
    }
    __device__ cuComplex operator+(const cuComplex& a) {
        return cuComplex(r+a.r, i+a.i);
    }
};

__device__ int julia( int x, int y ) {
    const float scale = 1.5;
    float jx = scale * (float)(DIM/2 - x)/(DIM/2);
    float jy = scale * (float)(DIM/2 - y)/(DIM/2);

    cuComplex c(-0.8, 0.156);
    cuComplex a(jx, jy);

    int i = 0;
    for (i=0; i<200; i++) {
        a = a * a + c;
        if (a.magnitude2() > 1000)
            return 0;
    }

    return 1;
}

__global__ void kernel( unsigned char *ptr ) {
    // map from blockIdx to pixel position
    int x = blockIdx.x;
    int y = blockIdx.y;
    int offset = x + y * gridDim.x;

    // now calculate the value at that position
    int juliaValue = julia( x, y );
    ptr[offset*4 + 0] = 255 * juliaValue;
    ptr[offset*4 + 1] = 0;
    ptr[offset*4 + 2] = 0;
    ptr[offset*4 + 3] = 255;
}

// globals needed by the update routine
struct DataBlock {
    unsigned char   *dev_bitmap;
};

int main( void ) {
    DataBlock   data;
    CPUBitmap bitmap( DIM, DIM, &data );
    unsigned char    *dev_bitmap;

    HANDLE_ERROR( cudaMalloc( (void**)&dev_bitmap, bitmap.image_size() ) );
    data.dev_bitmap = dev_bitmap;

    dim3    grid(DIM,DIM);
    kernel<<<grid,1>>>( dev_bitmap );

    HANDLE_ERROR( cudaMemcpy( bitmap.get_ptr(), dev_bitmap,
                              bitmap.image_size(),
                              cudaMemcpyDeviceToHost ) );
                              
    HANDLE_ERROR( cudaFree( dev_bitmap ) );
                              
    bitmap.display_and_exit();
}

Solution

  • The problem is that this constructor of class cuComplex:

    cuComplex( float a, float b ) : r(a), i(b)  {}
    

    Is only a host method (i.e. can be called only from host code), but in the device function julia you attempt to call it when you create these objects:

    cuComplex c(-0.8, 0.156);
    cuComplex a(jx, jy);
    

    In order to solve it, mark it with __device__, similarlry to the other methods of the class:

    //vvvvvvvv--------------------------------------------
    __device__ cuComplex(float a, float b) : r(a), i(b) {}
    

    In CUDA programming, each method or function has an execution space property which determine in which context it can be invoked (the default in case there is no specifier is host only).
    See more info about it in the CUDA documentation: Function Execution Space Specifiers.