The code below causes the following error when compiled with nvcc eigen_complex.cu -I [path to eigen]
where nvcc
is at version 12.4 and Eigen is at the latest version. Is there a way for me to define the log2
function for cuda::std::complex
?
/tmp/eigen/Eigen/src/Core/NumTraits.h(35): error: no instance of overloaded function "log2" matches the argument list
argument types are: (cuda::std::__4::complex<float>)
return int(ceil(-log2(NumTraits<Real>::epsilon())));
#include <iostream>
#include <math.h>
#include <cuda/std/complex>
#include <Eigen/Core>
#include <Eigen/Dense>
typedef Eigen::Matrix<cuda::std::complex<float>, 2, 2> m2x2;
// Kernel function to add the elements of two arrays without any parallelization
__global__ void add(int n, m2x2 *x, m2x2 *y) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
y[i] = x[i] + y[i];
}
int main(void) {
int N = 1 << 10;
m2x2 *x, *y;
// Allocate Unified Memory – accessible from CPU or GPU
cudaMallocManaged(&x, N * sizeof(m2x2));
cudaMallocManaged(&y, N * sizeof(m2x2));
// initialize x and y arrays on the host
for (int i = 0; i < N; i++) {
m2x2 &temp1 = x[i];
temp1(0, 0) = {1.0f, 2.0f};
temp1(0, 1) = {1.0f, 2.0f};
temp1(1, 0) = {1.0f, 2.0f};
temp1(1, 1) = {1.0f, 2.0f};
m2x2 &temp2 = y[i];
temp2(0, 0) = {3.0f, 2.0f};
temp2(0, 1) = {3.0f, 2.0f};
temp2(1, 0) = {3.0f, 2.0f};
temp2(1, 1) = {3.0f, 2.0f};
}
std::cout << x[4] << std::endl;
// Run kernel on 1M elements on the GPU
int blockSize = 256;
int numBlocks = (N + blockSize - 1) / blockSize;
add<<<numBlocks, blockSize>>>(N, x, y);
// Wait for GPU to finish before accessing on host
cudaDeviceSynchronize();
// Check for errors (all values should be 3.0f)
std::cout << y[4] << std::endl;
// Free memory
cudaFree(x);
cudaFree(y);
return 0;
}
On the Eigen discord, someone informed me that I could use std::complex
if I compile with --expt-relaxed-constexpr
. I just finished testing it, and it seems to work.