c++arraysmaxmpipoint-to-point

finding global maxima of a function from comparing each processor's local maxima using MPI ring topology


I wish to use the MPI ring topology, passing each processor's maxima around the ring, comparing the local maxima and then output the global maxima for all processors. I am using a 10 dimensional Monte Carlo integration function. My first idea was to make an array with each processor's local maxima, then pass that value, compare and output the highest value. But I couldn't elegantly code to make an array which will take only each processors' max value and store it corresponding to rank of the processor, this way I can also keep track which processor got the global maxima.

I didn't finish my code yet, right now I am interested to see if an array with local maxima from processor's can be created. the way I coded, it's very time consuming and if there is a lot of processors, then I have to declare them each time, and yet I couldn't produce the array I am looking for. I am sharing the code here:

#include <iostream>
#include <fstream>
#include <iomanip>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <mpi.h>
using namespace std;


//define multivariate function F(x1, x2, ...xk)            

double f(double x[], int n)
{
    double y;
    int j;
    y = 0.0;

    for (j = 0; j < n-1; j = j+1)
      {
         y = y + exp(-pow((1-x[j]),2)-100*(pow((x[j+1] - pow(x[j],2)),2)));

      }     

    y = y;
    return y;
}

//define function for Monte Carlo Multidimensional integration

double int_mcnd(double(*fn)(double[],int),double a[], double b[], int n, int m)

{
    double r, x[n], v;
    int i, j;
    r = 0.0;
    v = 1.0;
    // initial seed value (use system time) 
    //srand(time(NULL)); 


    // step 1: calculate the common factor V
    for (j = 0; j < n; j = j+1)
      {
         v = v*(b[j]-a[j]);
      } 

    // step 2: integration
    for (i = 1; i <= m; i=i+1)
    {
        // calculate random x[] points
        for (j = 0; j < n; j = j+1)
        {
            x[j] = a[j] +  (rand()) /( (RAND_MAX/(b[j]-a[j])));
        }         
        r = r + fn(x,n);
    }
    r = r*v/m;

    return r;
}




double f(double[], int);
double int_mcnd(double(*)(double[],int), double[], double[], int, int); 



int main(int argc, char **argv)
{    

    int rank, size;

    MPI_Init (&argc, &argv);      // initializes MPI
    MPI_Comm_rank (MPI_COMM_WORLD, &rank); // get current MPI-process ID. O, 1, ...
    MPI_Comm_size (MPI_COMM_WORLD, &size); // get the total number of processes


    /* define how many integrals */
    const int n = 10;       

    double b[n] = {5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,5.0};                    
    double a[n] = {-5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0,-5.0};  

    double result, mean;
    int m;

    const unsigned int N = 5;
    double max = -1;
    double max_store[4];

    cout.precision(6);
    cout.setf(ios::fixed | ios::showpoint); 


    srand(time(NULL) * rank);  // each MPI process gets a unique seed

    m = 4;                // initial number of intervals

    // convert command-line input to N = number of points
    //N = atoi( argv[1] );


    for (unsigned int  i=0; i <=N; i++)
    {
        result = int_mcnd(f, a, b, n, m);
        mean = result/(pow(10,10));

        if( mean > max) 
        {
         max = mean;
        }

        //cout << setw(10)  << m << setw(10) << max << setw(10) << mean << setw(10) << rank << setw(10) << size <<endl;
        m = m*4; 
    }

    //cout << setw(30)  << m << setw(30) << result << setw(30) << mean <<endl; 
    printf("Process %d of %d mean = %1.5e\n and local max = %1.5e\n", rank, size, mean, max );
    if (rank==0)
        {
         max_store[0] = max;
        }
        else if (rank==1)
        {
         max_store[1] = max;
        }
        else if (rank ==2)
        {
         max_store[2] = max;
        }
        else if (rank ==3)
        {
         max_store[3] = max;
        }
    for( int k = 0; k < 4; k++ )
    {
     printf( "%1.5e\n", max_store[k]);
    }


    //double max_store[4] = {4.43095e-02, 5.76586e-02, 3.15962e-02, 4.23079e-02}; 

    double send_junk = max_store[0];
    double rec_junk;
    MPI_Status status;


  // This next if-statment implemeents the ring topology
  // the last process ID is size-1, so the ring topology is: 0->1, 1->2, ... size-1->0
  // rank 0 starts the chain of events by passing to rank 1
  if(rank==0) {
    // only the process with rank ID = 0 will be in this block of code.
    MPI_Send(&send_junk, 1, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD); //  send data to process 1
    MPI_Recv(&rec_junk, 1, MPI_DOUBLE, size-1, 0, MPI_COMM_WORLD, &status); // receive data from process size-1
  }
  else if( rank == size-1) { 
    MPI_Recv(&rec_junk, 1, MPI_DOUBLE, rank-1, 0, MPI_COMM_WORLD, &status); // recieve data from process rank-1 (it "left" neighbor")
    MPI_Send(&send_junk, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); // send data to its "right neighbor", rank 0
  }
  else {
    MPI_Recv(&rec_junk, 1, MPI_DOUBLE, rank-1, 0, MPI_COMM_WORLD, &status); // recieve data from process rank-1 (it "left" neighbor")
    MPI_Send(&send_junk, 1, MPI_DOUBLE, rank+1, 0, MPI_COMM_WORLD); // send data to its "right neighbor" (rank+1)
  }
  printf("Process %d send %1.5e\n and recieved %1.5e\n", rank, send_junk, rec_junk ); 


  MPI_Finalize(); // programs should always perform a "graceful" shutdown
    return 0;
}

compile with :

mpiCC -o gd test_code.cpp
 mpirun -np 4 ./gd

I would appreciate suggestion:

  1. if there is a more elegant way to make local maxima arrays?
  2. How to compare the local maxima and decide the global maxima while passing the values in a ring?

Also feel free to modify the code to provide me a better example to work with. I would appreciate any suggestion. thanks.


Solution

  • For this sort of thing, better using either MPI_Reduce() or MPI_Allreduce() with MPI_MAX as operator. The former will compute the max over the values exposed by all processes and give the result to the "root" process only, while the later will do the same, but give the results to all processes.

    // Only process of rank 0 get the global max
    MPI_Reduce( &local_max, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD );
    // All processes get the global max
    MPI_Allreduce( &local_max, &global_max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
    // All processes get the global max, stored in place of the local max
    // after the call ends - this might be the most interesting one for you
    MPI_Allreduce( MPI_IN_PLACE, &max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
    

    As you can see, you could just insert the 3rd example into your code to solve your problem.

    BTW, unrelated remark, but this hurts my eyes:

    if (rank==0)
        {
         max_store[0] = max;
        }
        else if (rank==1)
        {
         max_store[1] = max;
        }
        else if (rank ==2)
        {
         max_store[2] = max;
        }
        else if (rank ==3)
        {
         max_store[3] = max;
        }
    

    What about something like this:

    if ( rank < 4 && rank >= 0 ) {
        max_store[rank] = max;
    }