I want to do the EVD wth this 4x4
covariance matrix:
cuDoubleComplex m_cov_[16] = {
make_cuDoubleComplex(2.0301223848037391, 3.4235008150792548e-17),
make_cuDoubleComplex(1.0365842528472908, -2.1028119220635375),
make_cuDoubleComplex(2.7516978261134084, 0.95059712173944222),
make_cuDoubleComplex(-1.350157109070875, -1.1815219722269694),
make_cuDoubleComplex(1.0365842528472908, 2.1028119220635375),
make_cuDoubleComplex(2.7073859851827988, 8.7392491301242207e-17),
make_cuDoubleComplex(0.42038828834095354, 3.3356003818061963),
make_cuDoubleComplex(0.53443422887585779, -2.0017874620940646),
make_cuDoubleComplex(2.7516978261134084, -0.95059712173944222),
make_cuDoubleComplex(0.42038828834095354, -3.3356003818061963),
make_cuDoubleComplex(4.1748595442022722, 2.0100622219496206e-17),
make_cuDoubleComplex(-2.3832926547827333, -0.9692696339061293),
make_cuDoubleComplex(-1.350157109070875, 1.1815219722269694),
make_cuDoubleComplex(0.53443422887585779, 2.0017874620940646),
make_cuDoubleComplex(-2.3832926547827333, 0.9692696339061293),
make_cuDoubleComplex(1.5855784922744534, -1.5877902777669332e-17)
};
// EVD
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXcd> eigensolver(m_cov_);
eigenValues_ = eigensolver.eigenvalues();
eigenVectors_ = eigensolver.eigenvectors();
By C++ Eigen
lib, the eigen values
are below:
{{x = -2.2573766836348074e-15, y = 0},
{x = -9.709294041296323e-16, y = 0},
{x = 8.9445808618868127e-17, y = 0},
{x = 10.280850897111305, y = 0}}
And the 4x4
eigen vectors
are below:
{{x = -0.3470929425161523, y = 0}, {x = -0.77713832029830621, y = -0}, {x = -0.15994536685820598, y = 0}, {x = 0.5, y = 0}},
{{x = -0.4597724303808105, y = 0.48181665117044714}, {x = 0.12469176895072034, y = -0.019363390950754053}, {x = -0.28597710463196591, y = 0.45689839613393701}, {x = -0.21684345356939669, y = 0.45053181535169839}},
{{x = -0.42256553981019185, y = -0.42733105533794741}, {x = 0.41437862159459787, y = 0.29068296724286291}, {x = 0.33214873805084277, y = 0.14932354148008731}, {x = 0.45697128218787925, y = 0.2029217761985285}},
{{x = -0.21707002501160269, y = 0.16642011333440535}, {x = -0.27240794554375036, y = 0.22298146715732753}, {x = 0.60350719516570406, y = -0.43247796708208042}, {x = -0.38102789443353835, y = 0.32375568514474662}}}
But with the same input, cuBLAS return the different results.
I use the cuBLAS EVD function below:
CHECK_CUSOLVER(cusolverDnZheevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, m_eigen_vec.data(), N, m_eigen_value.data(), d_work, lwork, devInfo));
And the eigen values and eigen vectors are below:
// Eigen Values
{-5.2180864461495171e-16,
-3.0110342183593702e-16,
2.8548936444323134e-16,
10.497946406463264}
// Eigen Vectors
{{x = -0.38208898741712083, y = 0.41581487741664563}, {x = 0.32043709404849968,
y = -0.4517568232420171}, {x = 0.21717632600175682, y = -0.36577789346231687}, {x = -0.33093161501032731,
y = 0.28959813033042575}, {x = -0.17547383350755327, y = -0.69642001620440552}, {x = -0.10241833368805221,
y = -0.43952076894648784}, {x = 0.047402059701005216, y = 0.14281594546174881}, {x = 0.13099303872894871,
y = 0.49065012752041015}, {x = -0.32475905997549959, y = 0.077154117638887132}, {x = -0.32253254381877083,
y = 0.2157036870035538}, {x = -0.50261510442239055, y = 0.29617238148252628}, {x = -0.58415934115419899,
y = 0.23757380765099248}, {x = -0.23214840790484073, y = 0}, {x = 0.58224779741288002, y = 0}, {x = -0.67532037122395228,
y = 0}, {x = 0.38863480971863701, y = 0}}
You can notice that the sorted eigen values
calc from Eigen Lib
and from cuBLAS
are the same (three ~0 values and one larger value) but the eigen vectors
are totally different.
I've wrote a lot of unit tests for each step, and I'm sure that the eigen values and vectors from Eigen lib
are correct, can someone tell me how to get the same answer with cuBLAS
EVD?
Lifting and modifying the framework that I modified here, the results from cusolver seem to check the equation A * V = V * D, where A is your input matrix in the question, V is the matrix of eigenvectors returned by cusolver, and D is the diagonal matrix of eigenvalues returned by cusolver:
# cat t247.cu
#include <iostream>
#include <vector>
#include <cuComplex.h>
#include <cusolverDn.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>
const int N = 4;
cuDoubleComplex mc[N*N] = {
make_cuDoubleComplex(2.0301223848037391, 3.4235008150792548e-17),
make_cuDoubleComplex(1.0365842528472908, -2.1028119220635375),
make_cuDoubleComplex(2.7516978261134084, 0.95059712173944222),
make_cuDoubleComplex(-1.350157109070875, -1.1815219722269694),
make_cuDoubleComplex(1.0365842528472908, 2.1028119220635375),
make_cuDoubleComplex(2.7073859851827988, 8.7392491301242207e-17),
make_cuDoubleComplex(0.42038828834095354, 3.3356003818061963),
make_cuDoubleComplex(0.53443422887585779, -2.0017874620940646),
make_cuDoubleComplex(2.7516978261134084, -0.95059712173944222),
make_cuDoubleComplex(0.42038828834095354, -3.3356003818061963),
make_cuDoubleComplex(4.1748595442022722, 2.0100622219496206e-17),
make_cuDoubleComplex(-2.3832926547827333, -0.9692696339061293),
make_cuDoubleComplex(-1.350157109070875, 1.1815219722269694),
make_cuDoubleComplex(0.53443422887585779, 2.0017874620940646),
make_cuDoubleComplex(-2.3832926547827333, 0.9692696339061293),
make_cuDoubleComplex(1.5855784922744534, -1.5877902777669332e-17)
};
// Helper function to print a complex matrix
void printMatrix(const cuDoubleComplex* A, int rows, int cols) {
for (int i = 0; i < cols; ++i) {
for (int j = 0; j < rows; ++j) {
std::cout << "(" << cuCreal(A[j * cols + i]) << ", " << cuCimag(A[j * cols + i]) << ") ";
}
std::cout << std::endl;
}
}
// Helper function to check if two complex numbers are approximately equal
bool isApproxEqual(cuDoubleComplex a, cuDoubleComplex b, double tol = 1e-6) {
return (fabs(cuCreal(a) - cuCreal(b)) < tol) && (fabs(cuCimag(a) - cuCimag(b)) < tol);
}
int main() {
// Define a 4x4 complex covariance matrix
#ifdef USE_TRANSPOSE
cuDoubleComplex *A = new cuDoubleComplex[N*N];
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
A[i*N+j] = mc[j*N+i];
#else
cuDoubleComplex *A = mc;
#endif
std::cout << "Original matrix A:" << std::endl;
printMatrix(A, N, N);
// cuBLAS and cuSOLVER setup
cusolverDnHandle_t cusolverH = nullptr;
cublasHandle_t cublasH = nullptr;
cudaStream_t stream = nullptr;
cusolverStatus_t csstat = cusolverDnCreate(&cusolverH);
if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
cublasStatus_t cbstat = cublasCreate(&cublasH);
if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
// cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
cudaStreamCreate(&stream);
csstat = cusolverDnSetStream(cusolverH, stream);
if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
cbstat = cublasSetStream(cublasH, stream);
if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
// Device memory for matrix A, eigenvalues, and workspace
cuDoubleComplex* d_A = nullptr;
double* d_W = nullptr;
int* devInfo = nullptr;
int lwork = 0;
cuDoubleComplex* d_work = nullptr;
cudaMalloc((void**)&d_A, sizeof(cuDoubleComplex) * N * N);
cudaMalloc((void**)&d_W, sizeof(double) * N);
cudaMalloc((void**)&devInfo, sizeof(int));
cudaMemcpy(d_A, A, sizeof(cuDoubleComplex) * N * N, cudaMemcpyHostToDevice);
// Query working space
csstat = cusolverDnZheevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, d_A, N, d_W, &lwork);
if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex) * lwork);
// Compute EVD
csstat = cusolverDnZheevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, d_A, N, d_W, d_work, lwork, devInfo);
if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
// Copy results back to host
cuDoubleComplex V[N * N];
double W[N];
cudaMemcpy(V, d_A, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);
cudaMemcpy(W, d_W, sizeof(double) * N, cudaMemcpyDeviceToHost);
int hinfo;
cudaMemcpy(&hinfo, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
if (hinfo) std::cout << "devInfo: " << hinfo << std::endl;
std::cout << "Eigenvalues (diagonal of D):" << std::endl;
for (int i = 0; i < N; ++i) {
std::cout << W[i] << " ";
}
std::cout << std::endl;
std::cout << "Eigenvectors (columns of V):" << std::endl;
printMatrix(V, N, N);
// Verify A * V = V * D using cuBLAS
cuDoubleComplex* d_V = nullptr;
cuDoubleComplex* d_D = nullptr;
cuDoubleComplex* d_DV = nullptr;
cuDoubleComplex* d_AV = nullptr;
cudaMalloc((void**)&d_V, sizeof(cuDoubleComplex) * N * N);
cudaMalloc((void**)&d_D, sizeof(cuDoubleComplex) * N * N);
cudaMalloc((void**)&d_DV, sizeof(cuDoubleComplex) * N * N);
cudaMalloc((void**)&d_AV, sizeof(cuDoubleComplex) * N * N);
cudaMemset(d_D, 0, N*N*sizeof(cuDoubleComplex));
cuDoubleComplex *ev = new cuDoubleComplex[N];
for (int i = 0; i < N; i++) ev[i] = make_cuDoubleComplex(W[i], 0.0);
for (int i = 0; i < N; i++) cudaMemcpy(d_D+(i*(N+1)), ev+i, sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
cuDoubleComplex beta = make_cuDoubleComplex(0.0, 0.0);
// Calculate V * D d_A now contains the eigenvectors, it is effectively V
cbstat = cublasZgemm(cublasH, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, d_A, N, d_D, N, &beta, d_DV, N);
if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
cuDoubleComplex* d_nA = nullptr;
cudaMalloc(&d_nA, N*N*sizeof(cuDoubleComplex));
cudaMemcpy(d_nA, A, N*N*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
// Calculate A * V
cbstat = cublasZgemm(cublasH, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, d_nA, N, d_A, N, &beta, d_AV, N);
if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
// Copy results back to host
cuDoubleComplex AV[N * N];
cuDoubleComplex DV[N * N];
cudaMemcpy(AV, d_AV, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);
cudaMemcpy(DV, d_DV, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);
std::cout << "A * V:" << std::endl;
printMatrix(AV, N, N);
std::cout << "V * D:" << std::endl;
printMatrix(DV, N, N);
// Check if A * V is approximately equal to V * D
bool equal = true;
for (int i = 0; i < N; ++i) {
for (int j = 0; j < N; ++j) {
if (!isApproxEqual(AV[i * N + j], DV[i * N + j])) {
equal = false;
break;
}
}
}
if (equal) {
std::cout << "Verification successful: A * V = V * D" << std::endl;
} else {
std::cout << "Verification failed: A * V != V * D" << std::endl;
}
// Clean up
cudaFree(d_A);
cudaFree(d_W);
cudaFree(devInfo);
cudaFree(d_work);
cudaFree(d_V);
cudaFree(d_DV);
cudaFree(d_AV);
cusolverDnDestroy(cusolverH);
cublasDestroy(cublasH);
cudaStreamDestroy(stream);
return 0;
}
# nvcc -o t247 t247.cu -lcublas -lcusolver
# compute-sanitizer ./t247
========= COMPUTE-SANITIZER
Original matrix A:
(2.03012, 3.4235e-17) (1.03658, 2.10281) (2.7517, -0.950597) (-1.35016, 1.18152)
(1.03658, -2.10281) (2.70739, 8.73925e-17) (0.420388, -3.3356) (0.534434, 2.00179)
(2.7517, 0.950597) (0.420388, 3.3356) (4.17486, 2.01006e-17) (-2.38329, 0.96927)
(-1.35016, -1.18152) (0.534434, -2.00179) (-2.38329, -0.96927) (1.58558, -1.58779e-17)
Eigenvalues (diagonal of D):
-5.21809e-16 -3.01103e-16 2.85489e-16 10.4979
Eigenvectors (columns of V):
(-0.382089, 0.415815) (0.320437, -0.451757) (0.217176, -0.365778) (-0.330932, 0.289598)
(-0.175474, -0.69642) (-0.102418, -0.439521) (0.0474021, 0.142816) (0.130993, 0.49065)
(-0.324759, 0.0771541) (-0.322533, 0.215704) (-0.502615, 0.296172) (-0.584159, 0.237574)
(-0.232148, 0) (0.582248, 0) (-0.67532, 0) (0.388635, 0)
A * V:
(-7.00371e-17, 2.32465e-16) (-4.94086e-17, 4.01301e-16) (-5.28972e-17, 4.03915e-17) (-3.4741, 3.04019)
(2.63345e-16, 3.01536e-16) (3.27777e-16, 3.2118e-16) (-8.58426e-17, -4.60878e-16) (1.37516, 5.15082)
(-1.31867e-16, 5.0219e-16) (-2.23634e-16, 2.87755e-16) (4.18528e-16, -2.85843e-16) (-6.13247, 2.49404)
(1.07082e-16, -1.23345e-16) (1.49087e-16, -1.99645e-16) (7.87887e-17, -1.82635e-17) (4.07987, -2.48075e-16)
V * D:
(1.99377e-16, -2.16976e-16) (-9.64847e-17, 1.36026e-16) (6.20015e-17, -1.04426e-16) (-3.4741, 3.04019)
(9.15638e-17, 3.63398e-16) (3.08385e-17, 1.32341e-16) (1.35328e-17, 4.07724e-17) (1.37516, 5.15082)
(1.69462e-16, -4.02597e-17) (9.71157e-17, -6.49491e-17) (-1.43491e-16, 8.45541e-17) (-6.13247, 2.49404)
(1.21137e-16, 0) (-1.75317e-16, 0) (-1.92797e-16, 0) (4.07987, 0)
Verification successful: A * V = V * D
========= ERROR SUMMARY: 0 errors
#