cuda

Template __host__ __device__ calling host defined functions


During implementation of CUDA code I often need some utility functions, which will be called from device and also from host code. So I declare these functions as __host__ __device__. This is OK and possible device/host incompabilities can be handled by #ifdef CUDA_ARCH.

Problems come when the utility function is templated ie. by some functor type. If the template instance calls a __host__ function I get this warning:

calling a __host__ function from a __host__ __device__ function is not allowed
      detected during instantiation of "int foo(const T &) [with T=HostObject]" 

Only solution I know is to define the function twice - once for device and once for host code with different name (I cannot overload on __host__ __device__). But this means that there is code duplication and all other __host__ __device__ functions which will call it, must be also defined twice (even more code duplication).

Simplified example:

#include <cuda.h>
#include <iostream>

struct HostObject {
    __host__ 
    int value() const { return 42; }
};

struct DeviceObject {
    __device__ 
    int value() const { return 3; }
};

template <typename T> 
__host__ __device__ 
int foo(const T &obj) {
    return obj.value();
}

/*
template <typename T> 
__host__ 
int foo_host(const T &obj) {
    return obj.value();
}

template <typename T> 
__device__ 
int foo_device(const T &obj) {
    return obj.value();
}
*/

__global__ void kernel(int *data) {
    data[threadIdx.x] = foo(DeviceObject());
}

int main() {
    foo(HostObject());

    int *data;
    cudaMalloc((void**)&data, sizeof(int) * 64);
    kernel<<<1, 64>>>(data);
    cudaThreadSynchronize();
    cudaFree(data);
}

Warning is caused by the foo(HostObject()); call inside the main() function.

foo_host<> and foo_device<> are possible replacements for the problematic foo<>.

Is there a better solution? Can I prevent instantion of foo() on the device side?


Solution

  • You cannot prevent instantiation of either half of a __host__ __device__ function template instantiation. If you instantiate the function by calling it on the host (device), the compiler will also instantiate the device (host) half.

    The best you can do for your use case as of CUDA 7.0 is to suppress the warning using #pragma hd_warning_disable as in the following example and ensure that the function is not called incorrectly.

    #include <iostream>
    #include <cstdio>
    
    #pragma hd_warning_disable
    template<class Function>
    __host__ __device__
    void invoke(Function f)
    {
      f();
    }
    
    struct host_only
    {
      __host__
      void operator()()
      {
        std::cout << "host_only()" << std::endl;
      }
    };
    
    struct device_only
    {
      __device__
      void operator()()
      {
        printf("device_only(): thread %d\n", threadIdx.x);
      }
    };
    
    __global__
    void kernel()
    {
      // use from device with device functor
      invoke(device_only());
    
      // XXX error
      // invoke(host_only());
    }
    
    int main()
    {
      // use from host with host functor
      invoke(host_only());
    
      kernel<<<1,1>>>();
      cudaDeviceSynchronize();
    
      // XXX error
      // invoke(device_only());
    
      return 0;
    }