c++parallel-processingintel-mklsyclcblas

Intel MKL ERROR: incorrect parameter when calling gemm()


I have this code:

void my_function(double *image_vector, double *endmembers, double *abundanceVector, int it, int lines, int samples, int bands, int targets)
{
    double *h_Num;
    double *h_aux;
    double *h_Den;
    int lines_samples = lines*samples;
        
    h_Num = (double*) malloc(lines_samples * targets * sizeof(double));
    h_aux = (double*) malloc(lines_samples * bands * sizeof(double));
    h_Den = (double*) malloc(lines_samples * targets * sizeof(double));

    sycl::queue my_queue{sycl::default_selector{}};

        std::cout << "Device: "
                  << my_queue.get_device().get_info<sycl::info::device::name>()
                  << std::endl;
    
    // USM declaration
    double* image_vector_usm = sycl::malloc_shared<double>(lines_samples*bands, my_queue);
    double* endmembers_usm = sycl::malloc_shared<double>(targets*bands, my_queue);
    double* abundanceVector_usm = sycl::malloc_shared<double>(lines_samples*targets, my_queue); 
    double* h_Num_usm = sycl::malloc_shared<double>(lines_samples*targets, my_queue);
    double* h_aux_usm = sycl::malloc_shared<double>(lines_samples*bands, my_queue);
    double* h_Den_usm = sycl::malloc_shared<double>(lines_samples*targets, my_queue);
    auto nonTrans = oneapi::mkl::transpose::nontrans;
    auto yesTrans = oneapi::mkl::transpose::trans;
    
    int i,j;
    
    // We copy the parameters values into the USM variables // Maybe the mistake is here?
    std::memcpy(image_vector_usm, image_vector,sizeof(double) * lines_samples*bands);
    std::memcpy(endmembers_usm, endmembers,sizeof(double) * targets*bands);
    
    // Initialization
    for(i=0; i<lines_samples*targets; i++)
        abundanceVector_usm[i]=1;

    double alpha = 1.0;
    double beta = 0.0;

    // Start of callings to dgemm()

      oneapi::mkl::blas::row_major::gemm(my_queue, nonTrans, yesTrans, lines_samples, targets, bands, alpha, image_vector_usm,lines_samples, endmembers_usm, targets, beta, h_Num_usm, lines_samples);

    my_queue.wait_and_throw();

    for(i=0; i<it; i++)
    { 
        oneapi::mkl::blas::row_major::gemm(my_queue, nonTrans, nonTrans, lines_samples, targets, bands, alpha, abundanceVector_usm, lines_samples, endmembers_usm, targets, beta, h_aux_usm, lines_samples);
        
        my_queue.wait_and_throw();

        oneapi::mkl::blas::row_major::gemm(my_queue, nonTrans, yesTrans, lines_samples, targets, bands, alpha,h_aux_usm, lines_samples, endmembers_usm, targets, beta, h_Den_usm, lines_samples);

        my_queue.wait_and_throw();

        my_queue.parallel_for(sycl::range<1> (lines_samples*targets), [=] (sycl::id<1> j){
            abundanceVector_usm[j] = abundanceVector_usm[j]*(h_Num_usm[j]/h_Den_usm[j]);
        }).wait();
    }

    free(h_Den);
    free(h_Num);
    free(h_aux);
    
    // Free SYCL
    free(image_vector_usm, my_queue);
    free(endmembers_usm, my_queue);
    free(abundanceVector_usm, my_queue);
    free(h_Num_usm, my_queue);
    free(h_aux_usm, my_queue);
    free(h_Den_usm, my_queue);
}

This is the makefile, I've borrowed it from a default oneMKL example called "matrix_mul_mkl" and adapted it to my file name. The makefile is called GNUmakefile:

# Makefile for GNU Make

default: run

all: run

run: my_code

MKL_COPTS = -DMKL_ILP64  -I"${MKLROOT}/include"
MKL_LIBS = -L${MKLROOT}/lib/intel64 -lmkl_sycl -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl

DPCPP_OPTS = $(MKL_COPTS) -fsycl-device-code-split=per_kernel $(MKL_LIBS)

my_code: my_code.cpp RS_algorithm.cpp # This RS file is also needed to compile, nothing strange there I believe, completely sequential and just calls the function in my_code.
    dpcpp $^ -o $@ $(DPCPP_OPTS)


clean:
    -rm -f my_code

.PHONY: clean run all

I know that sometimes there are troubles with the ILP64 or LP64 libraries, but the matrix_mul example mentioned above works, so that can't be right?

And this is what the execution returns:

Device: Intel whatever model...
Intel MKL ERROR: Parameter 11 was incorrect on entry to cblas_dgemm.
Segmentation fault.

I have put some prints right under the calls to gemm() and done some tests; the first call seems to execute, but not the second one.

I have tried and checked everything, what is wrong?

Thank you in advance!


Solution

  • I found the solution. I was using the row_major version of the gemm call, and I had to call the column_major version for this code, be careful!