The following test.cu
program
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/transform.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <iostream>
using HOST_TYPE=int32_t;
using DEVICE_TYPE=int;
template <typename T>
struct Cast {
__host__ __device__ T operator()(HOST_TYPE i) const {
return static_cast<T>(i);
}
};
int main() {
// Initialize host data
thrust::host_vector<HOST_TYPE> const h_vec{1, 2, 3, 4, 5};
// Allocate space on the device
thrust::device_vector<DEVICE_TYPE> device_data(h_vec.size());
// Copy data from host to device
//thrust::copy(h_vec.cbegin(), h_vec.cend(), device_data.begin()); // this works
thrust::transform(h_vec.cbegin(), h_vec.cend(), device_data.begin(), Cast<DEVICE_TYPE>{});
// Copy back to host to check
thrust::host_vector<DEVICE_TYPE> host_data_copy = device_data;
for (DEVICE_TYPE val : host_data_copy) {
std::cout << val << " ";
}
std::cout << std::endl;
return 0;
}
causes
$ nvcc test.cu
$ ./a.out
terminate called after throwing an instance of 'thrust::system::system_error'
what(): parallel_for: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered
Aborted (core dumped)
This occurs at the line
thrust::transform(h_vec.cbegin(), h_vec.cend(), device_data.begin(), Cast<DEVICE_TYPE>{});
even though a similar thrust::copy()
runs fine:
thrust::copy(h_vec.cbegin(), h_vec.cend(), device_data.begin()); // this works
I wasn't able to find anything in the docs to say that thrust::transform()
shouldn't transform data between device and host. Did I miss this somewhere?
Using thrust::host
or thrust::device
execution policies did not help.
Version:
$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Fri_Nov__3_17:16:49_PDT_2023
Cuda compilation tools, release 12.3, V12.3.103
Build cuda_12.3.r12.3/compiler.33492891_0
Note: The practical application of this requires HOST_TYPE=char
but it was changed to HOST_TYPE=int32_t
for debugging/illustrative purposes, and for comparison with std::copy()
.
Please see the Transformations:
With the exception of
thrust::copy
, which can copy data between host and device, all iterator arguments to a Thrust algorithm should live in the same place: either all on the host or all on the device. When this requirement is violated the compiler will produce an error message.