I have a similar need for this question. The only difference is, I need to do this on a cuArray (because I want to use cudaAddressModeWrap
), instead of a normal device storage.
So, I tried this:
$ cat doubletexture.cu
#include <vector>
#include <iterator>
#include <iostream>
#include <cstdio>
static __inline__ __device__ double fetch_double(uint2 p){
return __hiloint2double(p.y, p.x);
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void my_print(cudaTextureObject_t texObject)
{
uint2 rval = tex1Dfetch<uint2>(texObject, 0.4);
double dval = fetch_double(rval);
printf("%f\n", dval);
}
int main()
{
double i = 0.35;
int numel = 10;
std::vector<double> h_data(numel, i);
double* d_data;
cudaMalloc(&d_data,numel*sizeof(double));
cudaMemcpy((void*)d_data, &h_data[0],
numel*sizeof(double), cudaMemcpyHostToDevice);
cudaTextureDesc td{};
td.filterMode = cudaFilterModePoint;
td.addressMode[0] = cudaAddressModeClamp;
td.readMode = cudaReadModeElementType;
cudaArray_t cArray;
cudaChannelFormatDesc channelDesc =
cudaCreateChannelDesc(32,32,0,0,cudaChannelFormatKindUnsigned);
cudaMallocArray(&cArray,&channelDesc,numel,0);
cudaResourceDesc resDesc;
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cArray;
cudaTextureObject_t texObject;
gpuErrchk(cudaCreateTextureObject(&texObject, &resDesc, &td, NULL));
cudaMemcpyToArray(cArray,0,0,d_data,numel*sizeof(double),
cudaMemcpyDeviceToDevice);
my_print<<<1,1>>>(texObject);
gpuErrchk(cudaDeviceSynchronize());
return 0;
}
I have tried using different address modes, or filter modes, but the result is always zero. Where is it wrong?
… only difference is, I need to do this on a cuArray
Then you need to use a different texture fetch API within the kernel. tex1Dfetch
is used for linear memory based texture reads. tex1D
is used for array based texture reads.
As indicated in comments, if you make that change in the kernel, the code should work correctly.