I'm trying to preform matrix multiplication using Blac's pdgemm. The exact subroutine for the matrix multiplication that I am using can be found here: http://www.netlib.org/scalapack/html/pblas_qref.html#PvGEMM
However my code returns "cannot allocate memory for thread-local data: ABORT" on the pdgemm call. In my code I'm multiplying a matrix by itself and it is a square matrix, so the resulting matrix is the same dimensions. Here is the code in question
#include <stdlib.h>
#include <stdio.h>
#include "mpi.h"
#include <math.h>
#define gridSize 10
int main(int argc, char* argv[]) {
int i,j,k, np, myid;
int bufsize;
double *buf; /* Buffer for reading */
MPI_Offset filesize;
MPI_File myfile; /* Shared file */
MPI_Status status; /* Status returned from read */
/* Initialize MPI */
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &np);
double *A = (double*) malloc(gridSize*gridSize*sizeof(double));
/*MPI-IO Code removed for clarity including buf and bufsize assignments
.
.
.
.
*/
//I use mpi-IO to read in a matrix from a file, each processor reads in a row and that row is store in the array called buf
for (j = 0; j <bufsize;j++){
A[myid*bufsize+j] = buf[j];
}
//BLACS portion
int ictxt, nprow, npcol, myrow, mycol, nb;
int info,itemp;
int ZERO = 0, ONE = 1;
nprow = 2; npcol = 2; nb = 2;
Cblacs_pinfo(&myid,&np);
Cblacs_get(-1,0,&ictxt);
Cblacs_gridinit(&ictxt,"Row",nprow,npcol);
Cblacs_gridinfo(ictxt,&nprow,&npcol,&myrow,&mycol);
int M = gridSize;
int descA[9], descx[9], descy[9];
int mA = numroc_(&M, &nb, &myrow, &ZERO, &nprow);
int nA = numroc_(&M, &nb, &mycol, &ZERO, &npcol);
int nx = numroc_(&M, &nb, &myrow, &ZERO, &nprow);
int my = numroc_(&M ,&nb, &myrow, &ZERO, &nprow);
descinit_(descA,&M,&M,&nb,&nb,&ZERO,&ZERO,&ictxt,&mA,&info);
descinit_(descx,&M,&M,&nb,&nb,&ZERO,&ZERO,&ictxt,&nx,&info);
descinit_(descy,&M,&M,&nb,&nb,&ZERO,&ZERO,&ictxt,&my,&info);
double* matrixA = (double*)malloc(mA*nA*sizeof(double));
double* matrixB = (double*)malloc(mA*nA*sizeof(double));
double* matrixC = (double*)calloc(mA*nA,sizeof(double));
int sat,sut;
for(i=0;i<mA;i++){
for(j=0;j<nA;j++){
sat = (myrow*nb)+i+(i/nb)*nb;
sut = (mycol*nb)+j+(j/nb)*nb;
matrixA[j*mA+i] = A[sat*M+sut];
matrixB[j*mA+i] = A[sat*M+sut];
}
}
double alpha = 1.0; double beta = 0.0;
//call where seg fault happens
pdgemm_("N","N",&M,&M,&M,&alpha,matrixA,&ONE,&ONE,descA,matrixB,&ONE,&ONE,descx,
&beta,matrixC,&ONE,&ONE,descy);
Cblacs_barrier(ictxt,"A");
Cblacs_gridexit(0);
/* Close the file */
MPI_File_close(&myfile);
if (myid==0) {
printf("Done\n");
}
MPI_Finalize();
exit(0);
}
I dont have much experience with ScaLapacs, but from the examples I have looked over I am not sure why I am getting the segmentation fault, any help would be appreciated.
I got it right by changing the "int" into "long" and "double" into "long double"
another method I tried that works is to link the libraries statically.
mpicc -w -o a a.c -L$MKLPATH -I$IMKLPATH -Wl,--start-group $MKLPATH/libmkl_scalapack.a $MKLPATH/libmkl_blacs_openmpi_lp64.a $MKLPATH/libmkl_intel_lp64.a $MKLPATH/libmkl_intel_thread.a $MKLPATH/libmkl_core.a -static_mpi -Wl,--end-group -lpthread -lm -openmp