csegmentation-faultfortranmpiscalapack

C/Fortran hybrid code using MPI/ScaLAPACK seg faults in a weird way


I am trying to translate this CODE from Fortran into C.

This is what I have so far:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#include "mpi.h"

#define PRINT_NOTHING             0
#define PRINT_UP_TO_MESGS         1
#define PRINT_UP_TO_ARRAYS        2
#define PRINT_UP_TO_MATRICES      3
#define PRINT_LEVEL               PRINT_UP_TO_MATRICES

/*! Parameters: */
#define TOTMEM                    425053208
#define INTMEM                    13107200
#define NTESTS                    20
#define DLEN_                     9
#define DBLESZ                    8               /*! Size of a DOUBLE PRE. */
#define MEMSIZ                    425053208/8     /*! Memory for DOUBLE PRE. */

#define MASTER                    0
#define MPI_ERROR_CODE_ERROR_GRID 1
#define MPI_ERROR_CODE_NOT_FACT   2
#define MPI_ERROR_CODE_NOT_SOLVE  3

#define BLACS_USER_WONT_FIN_MPI   0
#define BLACS_USER_WILL_FIN_MPI   1

#define DESCRIPTOR_SIZE           7

typedef enum {
  FALSE,
  TRUE
  
} logical;

/*! External subroutines: */
extern void Cblacs_barrier  (int context,
                             char* scope);
extern void Cblacs_exit     (int doneflag);
extern void Cblacs_get      (int ,
                             int ,
                             int *context);
extern void Cblacs_gridexit (int context);
extern void Cblacs_gridinfo (int context,
                             int *our_nprow,
                             int *our_npcol,
                             int *my_prow,
                             int *my_pcol);
extern void Cblacs_gridinit (int* context,
                             char* order,
                             int nproc_rows,
                             int nproc_cols);
extern void Cblacs_pinfo    (int *my_blacs_pid,
                             int *our_blacs_nprocs);

/* http://www.netlib.org/scalapack/explore-html/dd/d22/descinit_8f.html */
extern void descinit_       (int *desc,
                             int *m,
                             int *n,
                             int *mb,
                             int *nb,
                             int *irsrc,
                             int *icsrc,
                             int *ictxt,
                             int *lld,
                             int *info);
/* http://www.netlib.org/scalapack/explore-html/dc/d44/igsum2d___8c.html */
extern void igsum2d_        (int *contxt,
                             char *scope,
                             char *top,
                             int *m,
                             int *n,
                             int *a,
                             int *lda,
                             int *rdest,
                             int *cdest);
/* http://www.netlib.org/scalapack/explore-html/db/da1/pdbmatgen_8f.html */
extern void pdbmatgen_      (int *ictxt,
                             char *aform,
                             char *aform2,
                             int *bwl,
                             int *bwu,
                             int *n,
                             int *mb,
                             int *nb,
                             double *a,
                             int *lda,
                             int *iarow,
                             int *iacol,
                             int *iseed,
                             int *myrow,
                             int *mycol,
                             int *nprow,
                             int *npcol);
/* http://www.netlib.org/scalapack/explore-html/d8/dba/pdchekpad_8f.html */
extern void pdchekpad_      (int * ictxt,
                             char *mess,
                             int *m,
                             int *n,
                             double *a,
                             int *lda,
                             int *ipre,
                             int *ipost,
                             double *chkval);
/* http://www.netlib.org/scalapack/explore-html/d2/dc2/pddblaschk_8f.html */
extern void pddblaschk_     (char *symm,
                             char *uplo,
                             char *trans,
                             int *n,
                             int *bwl,
                             int *bwu,
                             int *nrhs,
                             double *x,
                             int *ix,
                             int *jx,
                             int *descx,
                             int *iaseed,
                             double *a,
                             int *ia,
                             int *ja,
                             int *desca,
                             int *ibseed,
                             double *anorm,
                             double *resid,
                             double *work,
                             int *worksiz);
/* http://www.netlib.org/scalapack/explore-html/d6/d3b/pdfillpad_8f.html */
extern void pdfillpad_      (int *ictxt,
                             int *m,
                             int *n,
                             double *a,
                             int *lda,
                             int *ipre,
                             int *ipost,
                             double *chkval);
/* http://www.netlib.org/scalapack/explore-html/d0/d2b/pdgbinfo_8f.html */
extern void pdgbinfo_       (char *summry,
                             int *nout,
                             char *trans,
                             int *nmat,
                             int *nval,
                             int *ldnval,
                             int *nbw,
                             int *bwlval,
                             int *bwuval,
                             int *ldbwval,
                             int *nnb,
                             int *nbval,
                             int *ldnbval,
                             int *nnr,
                             int *nrval,
                             int *ldnrval,
                             int *nnbr,
                             int *nbrval,
                             int *ldnbrval,
                             int *ngrids,
                             int *pval,
                             int *ldpval,
                             int *qval,
                             int *ldqval,
                             float *thresh,
                            
                             double *work,  /* Array to gather and bcast. */
                            
                             int *iam,
                             int *nprocs);
/* http://www.netlib.org/scalapack/explore-html/dd/dad/pdgbtrf_8f.html */
extern void pdgbtrf_        (int *n,
                             int *bwl,
                             int *bwu,
                             double *a,
                             int *ja,
                             int *desca,
                             int *ipiv,
                             double *af,
                             int *laf,
                             double *work,
                             int *lwork,
                             int *info);
/* http://www.netlib.org/scalapack/explore-html/d9/dda/pdgbtrs_8f.html */
extern void pdgbtrs_        (char *trans,
                             int *n,
                             int *bwl,
                             int *bwu,
                             int *nrhs,
                             double *a,
                             int *ja,
                             int *desca,
                             int *ipiv,
                             double *b,
                             int *ib,
                             int *descb,
                             double *af,
                             int *laf,
                             double *work,
                             int *lwork,
                             int *info);
/*
http://www.netlib.org/scalapack/explore-html/d2/d66/_e_i_g_2pdmatgen_8f.html
*/
extern void pdmatgen_       (int *ictxt,
                             char *aform,
                             char *diag,
                             int *m,
                             int *n,
                             int *mb,
                             int *nb,
                             double *a,
                             int *lda,
                             int *iarow,
                             int *iacol,
                             int *iseed,
                             int *iroff,
                             int *irnum,
                             int *icoff,
                             int *icnum,
                             int *myrow,
                             int *mycol,
                             int *nprow,
                             int *npcol);
/* http://www.netlib.org/scalapack/explore-html/d2/dcd/sltimer_8f_source.html */
extern void slboot_         (void);
extern void slcombine_      (int *ictxt,
                             char *scope,
                             char *op,
                             char *timetype,
                             int *n,
                             int *ibeg,
                             double *times);
extern void sltimer_        ( int *i );

/*! External functions: */
/* http://www.netlib.org/scalapack/explore-html/d4/d48/numroc_8f.html */
extern int numroc_      (int *n,
                         int *nb,
                         int *iproc,
                         int *isrcproc,
                         int *nprocs);
/* http://www.netlib.org/scalapack/explore-html/df/dee/tools_8f.html#a67ae4efe5110e3297b1e9e3a46d8c78b */
extern logical lsame_   (char *ca,
                         char *cb);
/* http://www.netlib.org/scalapack/explore-html/db/dd0/pdlange_8f.html */
extern double pdlange_  (char *norm,
                         int *m,
                         int *n,
                         double *a,
                         int *ia,
                         int *ja,
                         int *desca,
                         double *work );

/*! Intrinsic functions: */
/*       INTRINSIC          DBLE, MAX, MIN, MOD */
inline double DBLE(int xx) { return (double) xx; }
inline double MAX(double xx, double yy) { return xx >= yy? xx: yy; }
inline double MIN(double xx, double yy) { return xx <= yy? xx: yy; }
inline int MOD(int xx, int yy) { return xx%yy; }

/*! PROGRAM PDGBDRIVER: */
int main (int argc, char **argv) {

  /*! Parameters: */
  int ntests = NTESTS;  /* Number of num. tests to be performed */
  //   int       BLOCK_CYCLIC_2D = 1;
  //   int       DTYPE_ = 1;
  //   int       CTXT_ = 2;
  //   int       M_ = 3;
  //   int       N_ = 4;
  //   int       MB_ = 5;
  //   int       NB_ = 6;
  //   int       RSRC_ = 7;
  //   int       CSRC_ = 8;
  //   int       LLD_ = 9;
  //
  //   double    ZERO = 0.0;
  //   double    PADVAL = -9923.0;
  //
  //   int       INT_ONE = 1;
  
  /*! Local scalars: */
  logical   CHECK;        /* Should or shouldn't I perform the num. tests? */
  char      TRANS;        /* Should I transpose the matrix? */
  //   char      PASSED[6];
  char      OUTFILE[80];  /* ? */
  //   int       BWL;
  //   int       BWU;
  //   int       BW_NUM;
  //   int       FILLIN_SIZE;
  //   int       FREE_PTR;
  //   int       H;
  //   int       HH;
  int       I;          /* Iterator per process grid. */
  int       IAM;          /* My MPI process ID. */
  int       IASEED;       /* Seed for random matrix A creation. */
  int       IBSEED;       /* Seed for random matrix B creation. */
  //   int       ICTXT;
  //   int       ICTXTB;
  //   int       IERR_TEMP;
  //   int       IMIDPAD;
  //   int       INFO;
  //   int       IPA;
  //   int       IPB;
  //   int       IPOSTPAD;
  //   int       IPREPAD;
  //   int       IPW;
  //   int       IPW_SIZE;
  //   int       IPW_SOLVE;
  //   int       IPW_SOLVE_SIZE;
  //   int       IP_DRIVER_W;
  //   int       IP_FILLIN;
  //   int       J;
  //   int       K;
  
  /*! Data statements: */
  //   int       KFAIL = 4;
  //   int       KPASS = 4;
  //   int       KSKIP = 4;
  //   int       KTESTS = 4;
  
  /*! Local scalars: (continued) */
  //   int       MYCOL;
  //   int       MYRHS_SIZE;
  //   int       MYROW;
  //   int       N;
  //   int       NB;
  int       NBW;    /* Number of bandwith values per matrix. */
  int       NGRIDS; /* Number of process grids to try for. */
  int       NMAT;   /* Number of matrices  */
  int       NNB;    /* Number of sizes of NB (block column size). */
  int       NNBR;
  int       NNR;
  int       NOUT;   /* Device out... o.O */
  //   int       NP;
  int       NPCOL;  /* Procs per row in processors grid/ */
  int       NPROCS; /* The size of OUR communicator. */
  //   int       NPROCS_REAL;
  int       NPROW;  /* Procs per row in processors grid/ */
  //   int       NQ;
  //   int       NRHS;
  //   int       N_FIRST;
  //   int       N_LAST;
  //   int       WORKSIZ;
  
  float     THRESH; /* Threshold variable for numerical tests. */
  
  //   double    ANORM;
  //   double    NOPS;
  //   double    NOPS2;
  //   double    SRESID;
  //   double    TMFLOPS;
  //   double    TMFLOPS2;
  
  /*! Local arrays: */
  //   int       IPIV[INTMEM];
  int       BWLVAL[NTESTS];  /* Values of lower diagonals per matrix. */
  int       BWUVAL[NTESTS];  /* Values of upper diagonals per matrix. */
  //   int       DESCA[7];
  //   int       DESCA2D[DLEN_];
  //   int       DESCB[7];
  //   int       DESCB2D[DLEN_];
  //   int       IERR[1];
  int       NBRVAL[NTESTS]; /* ? */
  int       NBVAL[NTESTS];  /* Column sizes per matrix. */
  int       NRVAL[NTESTS];  /* ? */
  int       NVAL[NTESTS];   /* Values of N for a given matrix. */
  int       PVAL[NTESTS];   /* Values for proc. rows. */
  int       QVAL[NTESTS];   /* Values for proc. cols. */
  
  //   double    CTIME[2];
  double    *MEM;      /* GLOABL array for broadcasting the information. */
  //   double    WTIME[2];

  /*! Executable statements: */

  /*! Get starting information: */
  Cblacs_pinfo(&IAM, &NPROCS);
  IASEED = 100;
  IBSEED = 200;

  MEM = (double *) malloc(MEMSIZ);
  
  pdgbinfo_(OUTFILE, &NOUT, &TRANS, &NMAT, NVAL, &ntests, &NBW,
            BWLVAL, BWUVAL, &ntests, &NNB, NBVAL, &ntests, &NNR,
            NRVAL, &ntests, &NNBR, NBRVAL, &ntests, &NGRIDS, PVAL,
            &ntests, QVAL, &ntests, &THRESH, MEM, &IAM, &NPROCS);

  CHECK = (THRESH >= 0.0f);

  /*! Print headings: */
  fprintf(stdout, "\n");
  fprintf(stdout, "TIME TR      N  BWL BWU    NB  NRHS    P    Q L*U Time Slv Time   MFLOPS   MFLOP2  CHECK\n");
  fprintf(stdout, "---- -- ------  --- ---  ---- ----- ---- ---- -------- -------- -------- -------- ------\n");
  fprintf(stdout, "\n");
  fflush(stdout);

  /*! Loop over different process grids: */
  for (I = 1; I <= NGRIDS; I++) {

    NPROW = PVAL[I - 1];
    NPCOL = QVAL[I - 1];

    fprintf(stdout, "Solving for a %d x %d grid!\n", NPROW, NPCOL);
  }

  free(MEM);
  Cblacs_exit(0);
  exit(EXIT_SUCCESS);
}

Please excuse the amount of commented-out variables, but like I said, I am basically translating the reference driver, from scratch.

My problem is that it suddenly started seg-faulting, as soon as I wrote the for loop... it was working before. That is a clear sign of a funky memory manipulation issue, which probably arises from issues related to the memory management differences between C and Fortran.

At least, that is my guess.

Does anybody has a clue on what may be going wrong?


Solution

  • From what I read from your code, you are never actually checking the value of NGRIDS. my guess is that it's too large and the array indices go beyond limits.