cintel-mkllapackecblas

Proper way to calculate `trans(a)*inv(b)*a` with Intel MKL


I am using Intel's MKL LAPACKE and CBLAS to calculate

yn = trans(a)*inv(zt)*a + trans(b)*inv(zl)*b

Where a and b are m-by-n real matrices, zt and zl are m-by-m complex matrices. The resulting complex matrix yn is n-by-n.

Here is how I am doing it:

zt <- inv(zt)
zl <- inv(zl)
c <- zt*a
yn <- trans(a)*c
c <- zl*b
yn <- trans(b)*c + yn

The actual code:

#include <math.h>
#include <complex.h>
#include <stdlib.h>
#include <mkl_types.h>
#define MKL_Complex16 _Complex double //overwrite type
#include <mkl.h>
#include <mkl_lapacke.h>

int calc_yn(
    _Complex double* yn, double* a, double *b, _Complex double* zl,
    _Complex double* zt, int m, int n)
{
    lapack_int* ipiv = (MKL_INT*) malloc(sizeof(lapack_int)*m);
    LAPACKE_zgetrf(LAPACK_ROW_MAJOR, m, m, zt, m, ipiv);
    LAPACKE_zgetri(LAPACK_ROW_MAJOR, m, zt, m, ipiv);
    LAPACKE_zgetrf(LAPACK_ROW_MAJOR, m, m, zl, m, ipiv);
    LAPACKE_zgetri(LAPACK_ROW_MAJOR, m, zl, m, ipiv);
    free(ipiv);
    const double alpha = 1.0;
    const double beta = 0.0;
    lapack_complex_double* c = (lapack_complex_double*) malloc(
        sizeof(lapack_complex_double)*(m*n));
    // c <- zt*a
    cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
                m, n, m,
                &alpha, zt, m, a, n,
                &beta, c, n);
    // yn <- aT*c
    cblas_zgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
                n, n, m,
                &alpha, a, n, c, n,
                &beta, yn, n);
    // c <- zl*b
    cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
                m, n, m,
                &alpha, zl, m, b, n,
                &beta, c, n);
    // yn <- bT*c + yn
    cblas_zgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
                n, n, m,
                &alpha, b, n, c, n,
                &alpha, yn, n);
    free(c);
    return 0;
}

int main()
{
    int m = 2;
    int n = 3;
    _Complex double* yn = (_Complex double*) malloc(sizeof(_Complex double*)*(n*n));
    double a[] = {
        0.5, 0.0, 0.5,
        0.5, 0.5, 0.0
    };
    double b[] = {
        1.0, 0.0, -1.0,
        1.0, -1.0, 0.0
    };
    _Complex double zt[] = {
        (0.004 + 0.09*I), (-0.004 - 0.12*I),
        (-0.004 - 0.12*I), (0.005 + 0.11*I)
    };
    _Complex double zl[] = {
        (0.1 + 2.13*I), (-124.004 - 800.12*I),
        (-124.004 - 800.12*I), (0.4 + 4.08*I)
    };
    calc_yn(yn, a, b, zl, zt, m, n);
    free(yn);
    return 0;
}
// compile command (MKLROOT is defined by a bash script that is shipped together with intel's MKL):
//gcc -std=c11 -DMKL_ILP64 -m64 -g -o test.a test.c -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl

When I run that test it gives an error

*** Error in `./test.a': free(): invalid next size (fast): 0x0000000001f72010 ***
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(+0x777e5)[0x7f8dee29c7e5]
/lib/x86_64-linux-gnu/libc.so.6(+0x8037a)[0x7f8dee2a537a]
/lib/x86_64-linux-gnu/libc.so.6(cfree+0x4c)[0x7f8dee2a953c]
./test.a[0x400d49]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f8dee245830]
./test.a[0x4007b9]
======= Memory map: ========
00400000-00401000 r-xp 00000000 08:06 2883692                            /home/pedro/codigos/HP_HEM/test.a
00601000-00602000 r--p 00001000 08:06 2883692                            /home/pedro/codigos/HP_HEM/test.a
00602000-00603000 rw-p 00002000 08:06 2883692                            /home/pedro/codigos/HP_HEM/test.a
01f72000-01f93000 rw-p 00000000 00:00 0                                  [heap]
7f8de4000000-7f8de4021000 rw-p 00000000 00:00 0 
7f8de4021000-7f8de8000000 ---p 00000000 00:00 0 
7f8de9d2f000-7f8dea73e000 rw-p 00000000 00:00 0 
7f8dea73e000-7f8deddf9000 r-xp 00000000 08:06 2505573                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_avx2.so
7f8deddf9000-7f8dedff8000 ---p 036bb000 08:06 2505573                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_avx2.so
7f8dedff8000-7f8dedfff000 r--p 036ba000 08:06 2505573                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_avx2.so
7f8dedfff000-7f8dee00a000 rw-p 036c1000 08:06 2505573                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_avx2.so
7f8dee00a000-7f8dee00f000 rw-p 00000000 00:00 0 
7f8dee00f000-7f8dee025000 r-xp 00000000 08:06 1184923                    /lib/x86_64-linux-gnu/libgcc_s.so.1
7f8dee025000-7f8dee224000 ---p 00016000 08:06 1184923                    /lib/x86_64-linux-gnu/libgcc_s.so.1
7f8dee224000-7f8dee225000 rw-p 00015000 08:06 1184923                    /lib/x86_64-linux-gnu/libgcc_s.so.1
7f8dee225000-7f8dee3e5000 r-xp 00000000 08:06 1184885                    /lib/x86_64-linux-gnu/libc-2.23.so
7f8dee3e5000-7f8dee5e5000 ---p 001c0000 08:06 1184885                    /lib/x86_64-linux-gnu/libc-2.23.so
7f8dee5e5000-7f8dee5e9000 r--p 001c0000 08:06 1184885                    /lib/x86_64-linux-gnu/libc-2.23.so
7f8dee5e9000-7f8dee5eb000 rw-p 001c4000 08:06 1184885                    /lib/x86_64-linux-gnu/libc-2.23.so
7f8dee5eb000-7f8dee5ef000 rw-p 00000000 00:00 0 
7f8dee5ef000-7f8dee5f2000 r-xp 00000000 08:06 1184909                    /lib/x86_64-linux-gnu/libdl-2.23.so
7f8dee5f2000-7f8dee7f1000 ---p 00003000 08:06 1184909                    /lib/x86_64-linux-gnu/libdl-2.23.so
7f8dee7f1000-7f8dee7f2000 r--p 00002000 08:06 1184909                    /lib/x86_64-linux-gnu/libdl-2.23.so
7f8dee7f2000-7f8dee7f3000 rw-p 00003000 08:06 1184909                    /lib/x86_64-linux-gnu/libdl-2.23.so
7f8dee7f3000-7f8dee8fb000 r-xp 00000000 08:06 1184955                    /lib/x86_64-linux-gnu/libm-2.23.so
7f8dee8fb000-7f8deeafa000 ---p 00108000 08:06 1184955                    /lib/x86_64-linux-gnu/libm-2.23.so
7f8deeafa000-7f8deeafb000 r--p 00107000 08:06 1184955                    /lib/x86_64-linux-gnu/libm-2.23.so
7f8deeafb000-7f8deeafc000 rw-p 00108000 08:06 1184955                    /lib/x86_64-linux-gnu/libm-2.23.so
7f8deeafc000-7f8deeb14000 r-xp 00000000 08:06 1185031                    /lib/x86_64-linux-gnu/libpthread-2.23.so
7f8deeb14000-7f8deed13000 ---p 00018000 08:06 1185031                    /lib/x86_64-linux-gnu/libpthread-2.23.so
7f8deed13000-7f8deed14000 r--p 00017000 08:06 1185031                    /lib/x86_64-linux-gnu/libpthread-2.23.so
7f8deed14000-7f8deed15000 rw-p 00018000 08:06 1185031                    /lib/x86_64-linux-gnu/libpthread-2.23.so
7f8deed15000-7f8deed19000 rw-p 00000000 00:00 0 
7f8deed19000-7f8deeeb6000 r-xp 00000000 08:06 2505566                    /opt/intel/compilers_and_libraries_2018.2.199/linux/compiler/lib/intel64_lin/libiomp5.so
7f8deeeb6000-7f8def0b6000 ---p 0019d000 08:06 2505566                    /opt/intel/compilers_and_libraries_2018.2.199/linux/compiler/lib/intel64_lin/libiomp5.so
7f8def0b6000-7f8def0b8000 r--p 0019d000 08:06 2505566                    /opt/intel/compilers_and_libraries_2018.2.199/linux/compiler/lib/intel64_lin/libiomp5.so
7f8def0b8000-7f8def0c2000 rw-p 0019f000 08:06 2505566                    /opt/intel/compilers_and_libraries_2018.2.199/linux/compiler/lib/intel64_lin/libiomp5.so
7f8def0c2000-7f8def0f1000 rw-p 00000000 00:00 0 
7f8def0f1000-7f8df2eb3000 r-xp 00000000 08:06 2505576                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_core.so
7f8df2eb3000-7f8df30b2000 ---p 03dc2000 08:06 2505576                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_core.so
7f8df30b2000-7f8df30b9000 r--p 03dc1000 08:06 2505576                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_core.so
7f8df30b9000-7f8df30e6000 rw-p 03dc8000 08:06 2505576                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_core.so
7f8df30e6000-7f8df30fa000 rw-p 00000000 00:00 0 
7f8df30fa000-7f8df4fd7000 r-xp 00000000 08:06 2505580                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_intel_thread.so
7f8df4fd7000-7f8df51d6000 ---p 01edd000 08:06 2505580                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_intel_thread.so
7f8df51d6000-7f8df51da000 r--p 01edc000 08:06 2505580                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_intel_thread.so
7f8df51da000-7f8df543e000 rw-p 01ee0000 08:06 2505580                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_intel_thread.so
7f8df543e000-7f8df5446000 rw-p 00000000 00:00 0 
7f8df5446000-7f8df5c5b000 r-xp 00000000 08:06 2505578                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_intel_ilp64.so
7f8df5c5b000-7f8df5e5a000 ---p 00815000 08:06 2505578                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_intel_ilp64.so
7f8df5e5a000-7f8df5e5c000 r--p 00814000 08:06 2505578                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_intel_ilp64.so
7f8df5e5c000-7f8df5e6e000 rw-p 00816000 08:06 2505578                    /opt/intel/compilers_and_libraries_2018.2.199/linux/mkl/lib/intel64_lin/libmkl_intel_ilp64.so
7f8df5e6e000-7f8df5e74000 rw-p 00000000 00:00 0 
7f8df5e74000-7f8df5e9a000 r-xp 00000000 08:06 1184857                    /lib/x86_64-linux-gnu/ld-2.23.so
7f8df6028000-7f8df607d000 rw-p 00000000 00:00 0 
7f8df6096000-7f8df6099000 rw-p 00000000 00:00 0 
7f8df6099000-7f8df609a000 r--p 00025000 08:06 1184857                    /lib/x86_64-linux-gnu/ld-2.23.so
7f8df609a000-7f8df609b000 rw-p 00026000 08:06 1184857                    /lib/x86_64-linux-gnu/ld-2.23.so
7f8df609b000-7f8df609c000 rw-p 00000000 00:00 0 
7ffc998d2000-7ffc998f4000 rw-p 00000000 00:00 0                          [stack]
7ffc99987000-7ffc9998a000 r--p 00000000 00:00 0                          [vvar]
7ffc9998a000-7ffc9998c000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
Aborted (core dumped)

It does not show if I comment the 2nd and 4th call to zgemm. From what I've read in the docs, the parameters are correct. I tried changing them, but the error persists or I get an error of incorrect parameter like:

Intel MKL ERROR: Parameter 9 was incorrect on entry to cblas_zgemm.

So, what is wrong in my code?


Solution

  • The variable yn is declared as a pointer to _Complex double, i.e. it can be seen as an array of _Complex double elements. But you allocate memory for n*n elements of type _Complex double*.

    If the size of _Complex double is larger than the size of _Complex double* (highly likely, probably twice (or even four times) the size) then you allocate to little memory and will go out of bounds when using the array.

    Going out of bounds leads to undefined behavior.