c++cudaeigencomplex-numbers

cuda::std::complex in Eigen::Matrix causing error


The code below causes the following error when compiled with nvcc eigen_complex.cu -I [path to eigen] where nvcc is at version 12.4 and Eigen is at the latest version. Is there a way for me to define the log2 function for cuda::std::complex?

/tmp/eigen/Eigen/src/Core/NumTraits.h(35): error: no instance of overloaded function "log2" matches the argument list
            argument types are: (cuda::std::__4::complex<float>)
      return int(ceil(-log2(NumTraits<Real>::epsilon())));

#include <iostream>
#include <math.h>

#include <cuda/std/complex>

#include <Eigen/Core>
#include <Eigen/Dense>

typedef Eigen::Matrix<cuda::std::complex<float>, 2, 2> m2x2;

// Kernel function to add the elements of two arrays without any parallelization
__global__ void add(int n, m2x2 *x, m2x2 *y) {
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  y[i]  = x[i] + y[i];
}

int main(void) {

  int N = 1 << 10;
  m2x2 *x, *y;

  // Allocate Unified Memory – accessible from CPU or GPU
  cudaMallocManaged(&x, N * sizeof(m2x2));
  cudaMallocManaged(&y, N * sizeof(m2x2));

  // initialize x and y arrays on the host
  for (int i = 0; i < N; i++) {
    m2x2 &temp1 = x[i];
    temp1(0, 0) = {1.0f, 2.0f};
    temp1(0, 1) = {1.0f, 2.0f};
    temp1(1, 0) = {1.0f, 2.0f};
    temp1(1, 1) = {1.0f, 2.0f};

    m2x2 &temp2 = y[i];
    temp2(0, 0) = {3.0f, 2.0f};
    temp2(0, 1) = {3.0f, 2.0f};
    temp2(1, 0) = {3.0f, 2.0f};
    temp2(1, 1) = {3.0f, 2.0f};
  }
  std::cout << x[4] << std::endl;

  // Run kernel on 1M elements on the GPU
  int blockSize = 256;
  int numBlocks = (N + blockSize - 1) / blockSize;
  add<<<numBlocks, blockSize>>>(N, x, y);

  // Wait for GPU to finish before accessing on host
  cudaDeviceSynchronize();

  // Check for errors (all values should be 3.0f)
  std::cout << y[4] << std::endl;

  // Free memory
  cudaFree(x);
  cudaFree(y);

  return 0;
}

Solution

  • On the Eigen discord, someone informed me that I could use std::complex if I compile with --expt-relaxed-constexpr. I just finished testing it, and it seems to work.