I have this code:
void my_function(double *image_vector, double *endmembers, double *abundanceVector, int it, int lines, int samples, int bands, int targets)
{
double *h_Num;
double *h_aux;
double *h_Den;
int lines_samples = lines*samples;
h_Num = (double*) malloc(lines_samples * targets * sizeof(double));
h_aux = (double*) malloc(lines_samples * bands * sizeof(double));
h_Den = (double*) malloc(lines_samples * targets * sizeof(double));
sycl::queue my_queue{sycl::default_selector{}};
std::cout << "Device: "
<< my_queue.get_device().get_info<sycl::info::device::name>()
<< std::endl;
// USM declaration
double* image_vector_usm = sycl::malloc_shared<double>(lines_samples*bands, my_queue);
double* endmembers_usm = sycl::malloc_shared<double>(targets*bands, my_queue);
double* abundanceVector_usm = sycl::malloc_shared<double>(lines_samples*targets, my_queue);
double* h_Num_usm = sycl::malloc_shared<double>(lines_samples*targets, my_queue);
double* h_aux_usm = sycl::malloc_shared<double>(lines_samples*bands, my_queue);
double* h_Den_usm = sycl::malloc_shared<double>(lines_samples*targets, my_queue);
auto nonTrans = oneapi::mkl::transpose::nontrans;
auto yesTrans = oneapi::mkl::transpose::trans;
int i,j;
// We copy the parameters values into the USM variables // Maybe the mistake is here?
std::memcpy(image_vector_usm, image_vector,sizeof(double) * lines_samples*bands);
std::memcpy(endmembers_usm, endmembers,sizeof(double) * targets*bands);
// Initialization
for(i=0; i<lines_samples*targets; i++)
abundanceVector_usm[i]=1;
double alpha = 1.0;
double beta = 0.0;
// Start of callings to dgemm()
oneapi::mkl::blas::row_major::gemm(my_queue, nonTrans, yesTrans, lines_samples, targets, bands, alpha, image_vector_usm,lines_samples, endmembers_usm, targets, beta, h_Num_usm, lines_samples);
my_queue.wait_and_throw();
for(i=0; i<it; i++)
{
oneapi::mkl::blas::row_major::gemm(my_queue, nonTrans, nonTrans, lines_samples, targets, bands, alpha, abundanceVector_usm, lines_samples, endmembers_usm, targets, beta, h_aux_usm, lines_samples);
my_queue.wait_and_throw();
oneapi::mkl::blas::row_major::gemm(my_queue, nonTrans, yesTrans, lines_samples, targets, bands, alpha,h_aux_usm, lines_samples, endmembers_usm, targets, beta, h_Den_usm, lines_samples);
my_queue.wait_and_throw();
my_queue.parallel_for(sycl::range<1> (lines_samples*targets), [=] (sycl::id<1> j){
abundanceVector_usm[j] = abundanceVector_usm[j]*(h_Num_usm[j]/h_Den_usm[j]);
}).wait();
}
free(h_Den);
free(h_Num);
free(h_aux);
// Free SYCL
free(image_vector_usm, my_queue);
free(endmembers_usm, my_queue);
free(abundanceVector_usm, my_queue);
free(h_Num_usm, my_queue);
free(h_aux_usm, my_queue);
free(h_Den_usm, my_queue);
}
This is the makefile, I've borrowed it from a default oneMKL example called "matrix_mul_mkl" and adapted it to my file name. The makefile is called GNUmakefile:
# Makefile for GNU Make
default: run
all: run
run: my_code
MKL_COPTS = -DMKL_ILP64 -I"${MKLROOT}/include"
MKL_LIBS = -L${MKLROOT}/lib/intel64 -lmkl_sycl -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl
DPCPP_OPTS = $(MKL_COPTS) -fsycl-device-code-split=per_kernel $(MKL_LIBS)
my_code: my_code.cpp RS_algorithm.cpp # This RS file is also needed to compile, nothing strange there I believe, completely sequential and just calls the function in my_code.
dpcpp $^ -o $@ $(DPCPP_OPTS)
clean:
-rm -f my_code
.PHONY: clean run all
I know that sometimes there are troubles with the ILP64 or LP64 libraries, but the matrix_mul example mentioned above works, so that can't be right?
And this is what the execution returns:
Device: Intel whatever model...
Intel MKL ERROR: Parameter 11 was incorrect on entry to cblas_dgemm.
Segmentation fault.
I have put some prints right under the calls to gemm() and done some tests; the first call seems to execute, but not the second one.
I have tried and checked everything, what is wrong?
Thank you in advance!
I found the solution. I was using the row_major version of the gemm call, and I had to call the column_major version for this code, be careful!