copenmpreduction

openMP reduction on struct


I want to find the total sum, minimum and maximum (and their positions) in a matrix using openMP, and more specifically, the reduction clause.

The problem I'm having is that I can't apply reduction operations min or max to a structure (in my case, struct Extreme), and when I tried to use the declare reduction(...) clause to create my own operation, I couldn't get it to work either (I think because reduction won't perform on structs).

In order to solve the problem temporarily, I've applied a critical section to the code in order to safely read update the values of the structs minimum and maximum, but this makes the concurrent execution be very slow for large sizes of the matrix.

My question is then: what can I do in order to improve this solution? Is there a way to actually use reduction with a struct like I want to? Should I tackle the problem differently and make use of another openMP clause?

Here's my code (most variables are destined to report statistics of the executions):

/* Matrix summation using OpenMP
usage with gcc (version 4.2 or higher required):
gcc -O -fopenmp -o matrixSum-openmp matrixSum-openmp.c
./matrixSum-openmp size numWorkers
*/

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

#define MAXSIZE 10000           // Maximum matrix size
#define MATRIX_LIMIT 100        // Matrix elements range [0, MATRIX_LIMIT - 1]
#define MAXWORKERS 4            // Maximum number of workers
#define PROGRAM_EXECUTIONS 5    // Number of program executions

int numWorkers, size;
int matrix[MAXSIZE][MAXSIZE];
double start_time, end_time;

struct Extreme {
    int value;
    int pos_i, pos_j;
};

struct Extreme min (struct Extreme e1, struct Extreme e2) {
    return e1.value < e2.value ? e1 : e2;
}

struct Extreme max(struct Extreme e1, struct Extreme e2) {
    return e1.value > e2.value ? e1 : e2;
}

void readCommandLine(int argc, char *argv[]) {
    size = (argc > 1) ? atoi(argv[1]) : MAXSIZE;
    numWorkers = (argc > 2) ? atoi(argv[2]) : MAXWORKERS;
    if (size > MAXSIZE)
        size = MAXSIZE;
    if (numWorkers > MAXWORKERS)
        numWorkers = MAXWORKERS;
}

void initializeMatrix() {
    for (int i = 0; i < size; i++) {
        //printf("[ ");
        for (int j = 0; j < size; j++) {
            matrix[i][j] = rand() % MATRIX_LIMIT;
            //printf(" %d", matrix[i][j]);
        }
        //printf(" ]\n");
    }
}

double calculateAvg(const double v[PROGRAM_EXECUTIONS]) {
    double avg = 0;
    for(int i = 0; i < PROGRAM_EXECUTIONS; i++)
        avg += v[i];
    return avg / PROGRAM_EXECUTIONS;
}

// Read command line, initialize, and create threads
int main(int argc, char *argv[]) {

    // Read command line args if any.
    readCommandLine(argc, argv);

    // Initialize the matrix
    initializeMatrix();

    // Store for each number of processors used, the times it took to execute each program.
    // execution_times[i][j] = time (s) it took to execute with i processors program j.
    // Note execution_times[0][i] is the sequential time of program execution i.
    double execution_times[numWorkers][PROGRAM_EXECUTIONS];

    for(int num_proc = 0; num_proc < numWorkers; num_proc++) {
        // Set the number of threads for openMP.
        omp_set_num_threads(num_proc + 1);

        printf("\n\n============================| NUM PROCESSORS: %d |============================\n", num_proc + 1);
        for (int num_prog = 0; num_prog < PROGRAM_EXECUTIONS; num_prog++) {

            // Reset variables.
            int total_sum = 0, i, j;
            struct Extreme minimum, maximum;
            minimum.value = MATRIX_LIMIT;
            maximum.value = -MATRIX_LIMIT;

            printf("\nProgram execution number %d: \n", num_prog + 1);

            // Start timer
            start_time = omp_get_wtime();

            //#pragma omp declare reduction(myMin : struct Extreme : combinerMin) initializer(omp_orig = initMin)
            //#pragma omp declare reduction(myMax : struct Extreme : combinerMax) initializer(omp_priv = initMax)
            #pragma omp parallel for reduction (+:total_sum) private(j) // (myMin: minimum) (myMax: maximum)
            for (i = 0; i < size; i++) {
                for (j = 0; j < size; j++) {
                    total_sum += matrix[i][j];
                    #pragma omp critical
                    {
                        struct Extreme candidate = {.value = matrix[i][j], .pos_i = i, .pos_j = j};
                        minimum = min(minimum, candidate);
                        maximum = max(maximum, candidate);
                    }
                }
            }

            // Implicit barrier
            end_time = omp_get_wtime();

            execution_times[num_proc][num_prog] = end_time - start_time;

            printf("The total sum is %d\n", total_sum);
            printf("The minimum is %d at (%d, %d)\n", minimum.value, minimum.pos_i, minimum.pos_j);
            printf("The maximum is %d at (%d, %d)\n", maximum.value, maximum.pos_i, maximum.pos_j);
            printf("Sequential execution time: %f s\n", execution_times[0][num_prog]);
            printf("Concurrent execution time: %f s\n", execution_times[num_proc][num_prog]);
        }

        double seq_avg = calculateAvg(execution_times[0]);
        double exec_avg = calculateAvg(execution_times[num_proc]);
        double speedup = (seq_avg / exec_avg) * 100;

        printf("\nAverage sequential time (%d executions): %f", PROGRAM_EXECUTIONS, seq_avg);
        printf("\nAverage execution time (%d executions) with %d processor(s): %f s\n", PROGRAM_EXECUTIONS, num_proc + 1, exec_avg);
        printf("Speedup: %.2f%%", speedup);
    }

    printf("\n\n");
}

Solution

  • I solved it myself:

    It suffices to use two combiner functions with pointers carefully:

    /* Matrix summation using OpenMP
    usage with gcc (version 4.2 or higher required):
    gcc -O -fopenmp -o matrixSum-openmp matrixSum-openmp.c
    ./matrixSum-openmp size numWorkers
    */
    
    #include <omp.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <time.h>
    
    #define MAXSIZE 10500                // Maximum matrix size
    #define MATRIX_LIMIT 10000000000           // Matrix elements range [0, MATRIX_LIMIT - 1]
    #define MAXWORKERS 4               // Maximum number of workers
    #define PROGRAM_EXECUTIONS 10       // Number of program executions
    
    int numWorkers, size;
    int matrix[MAXSIZE][MAXSIZE];
    double start_time, end_time;
    
    struct Extreme {
        long int value;
        int pos_i, pos_j;
    };
    
    struct Extreme min(struct Extreme e1, struct Extreme e2) {
        return e1.value < e2.value ? e1 : e2;
    }
    
    struct Extreme max(struct Extreme e1, struct Extreme e2) {
        return e1.value > e2.value ? e1 : e2;
    }
    
    void readCommandLine(int argc, char *argv[]) {
        size = (argc > 1) ? atoi(argv[1]) : MAXSIZE;
        numWorkers = (argc > 2) ? atoi(argv[2]) : MAXWORKERS;
        if (size > MAXSIZE)
            size = MAXSIZE;
        if (numWorkers > MAXWORKERS)
            numWorkers = MAXWORKERS;
    }
    
    void initializeMatrix() {
        for (int i = 0; i < size; i++) {
            //printf("[ ");
            for (int j = 0; j < size; j++) {
                matrix[i][j] = rand() % MATRIX_LIMIT;
                //printf(" %d", matrix[i][j]);
            }
            //printf(" ]\n");
        }
    }
    
    double calculateAvg(const double v[PROGRAM_EXECUTIONS]) {
        double avg = 0;
        for(int i = 0; i < PROGRAM_EXECUTIONS; i++)
            avg += v[i];
        return avg / PROGRAM_EXECUTIONS;
    }
    
    void combinerMin(struct Extreme *out, struct Extreme *in) {
        *out =  out->value < in->value ? *out : *in;
    }
    
    void combinerMax(struct Extreme *out, struct Extreme *in) {
        *out =  out->value > in->value ? *out : *in;
    }
    
    // Read command line, initialize, and create threads
    int main(int argc, char *argv[]) {
    
        srand(time(0));
    
        // Read command line args if any.
        readCommandLine(argc, argv);
    
        // Initialize the matrix
        initializeMatrix();
    
        // Store for each number of processors used, the times it took to execute each program.
        // execution_times[i][j] = time (s) it took to execute with i processors program j.
        // Note execution_times[0][i] is the sequential time of program execution i.
        double execution_times[numWorkers][PROGRAM_EXECUTIONS];
    
        for(int num_proc = 0; num_proc < numWorkers; num_proc++) {
            // Set the number of threads for openMP.
            omp_set_num_threads(num_proc + 1);
    
            printf("\n\n============================| NUM PROCESSORS: %d |============================\n", num_proc + 1);
            for (int num_prog = 0; num_prog < PROGRAM_EXECUTIONS; num_prog++) {
    
                // Reset variables.
                int total_sum = 0, i, j;
                struct Extreme minimum, maximum;
                minimum.value = MATRIX_LIMIT;
                maximum.value = -MATRIX_LIMIT;
    
                printf("\nProgram execution number %d: \n", num_prog + 1);
    
                // Start timer
                start_time = omp_get_wtime();
    
                #pragma omp declare reduction(myMin : struct Extreme : combinerMin(&omp_out, &omp_in)) initializer(omp_priv = omp_orig)
                #pragma omp declare reduction(myMax : struct Extreme : combinerMax(&omp_out, &omp_in)) initializer(omp_priv = omp_orig)
                #pragma omp parallel for reduction (+:total_sum) reduction (myMin: minimum) reduction (myMax: maximum) private(j)
                for (i = 0; i < size; i++) {
                    for (j = 0; j < size; j++) {
                        total_sum += matrix[i][j];
                        struct Extreme candidate = {.value = matrix[i][j], .pos_i = i, .pos_j = j};
                        minimum = min(minimum, candidate);
                        maximum = max(maximum, candidate);
                    }
                } // Implicit barrier
                end_time = omp_get_wtime();
    
                execution_times[num_proc][num_prog] = end_time - start_time;
    
                printf("The total sum is %d\n", total_sum);
                printf("The minimum is %ld at (%d, %d)\n", minimum.value, minimum.pos_i, minimum.pos_j);
                printf("The maximum is %ld at (%d, %d)\n", maximum.value, maximum.pos_i, maximum.pos_j);
                printf("Sequential execution time: %f s\n", execution_times[0][num_prog]);
                printf("Concurrent execution time: %f s\n", execution_times[num_proc][num_prog]);
            }
    
            double seq_avg = calculateAvg(execution_times[0]);
            double exec_avg = calculateAvg(execution_times[num_proc]);
            double speedup = (seq_avg / exec_avg) * 100;
    
            printf("\nAverage sequential time (%d executions): %f s", PROGRAM_EXECUTIONS, seq_avg);
            printf("\nAverage execution time (%d executions): %f s\n", PROGRAM_EXECUTIONS, exec_avg);
            printf("Speedup: %.2f%%", speedup);
        }
    
        printf("\n\n");
    }