I want to find the total sum, minimum and maximum (and their positions) in a matrix using openMP, and more specifically, the reduction clause.
The problem I'm having is that I can't apply reduction operations min
or max
to a structure (in my case, struct Extreme
), and when I tried to use the declare reduction(...)
clause to create my own operation, I couldn't get it to work either (I think because reduction won't perform on structs).
In order to solve the problem temporarily, I've applied a critical section to the code in order to safely read update the values of the structs minimum
and maximum
, but this makes the concurrent execution be very slow for large sizes of the matrix.
My question is then: what can I do in order to improve this solution? Is there a way to actually use reduction with a struct like I want to? Should I tackle the problem differently and make use of another openMP clause?
Here's my code (most variables are destined to report statistics of the executions):
/* Matrix summation using OpenMP
usage with gcc (version 4.2 or higher required):
gcc -O -fopenmp -o matrixSum-openmp matrixSum-openmp.c
./matrixSum-openmp size numWorkers
*/
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#define MAXSIZE 10000 // Maximum matrix size
#define MATRIX_LIMIT 100 // Matrix elements range [0, MATRIX_LIMIT - 1]
#define MAXWORKERS 4 // Maximum number of workers
#define PROGRAM_EXECUTIONS 5 // Number of program executions
int numWorkers, size;
int matrix[MAXSIZE][MAXSIZE];
double start_time, end_time;
struct Extreme {
int value;
int pos_i, pos_j;
};
struct Extreme min (struct Extreme e1, struct Extreme e2) {
return e1.value < e2.value ? e1 : e2;
}
struct Extreme max(struct Extreme e1, struct Extreme e2) {
return e1.value > e2.value ? e1 : e2;
}
void readCommandLine(int argc, char *argv[]) {
size = (argc > 1) ? atoi(argv[1]) : MAXSIZE;
numWorkers = (argc > 2) ? atoi(argv[2]) : MAXWORKERS;
if (size > MAXSIZE)
size = MAXSIZE;
if (numWorkers > MAXWORKERS)
numWorkers = MAXWORKERS;
}
void initializeMatrix() {
for (int i = 0; i < size; i++) {
//printf("[ ");
for (int j = 0; j < size; j++) {
matrix[i][j] = rand() % MATRIX_LIMIT;
//printf(" %d", matrix[i][j]);
}
//printf(" ]\n");
}
}
double calculateAvg(const double v[PROGRAM_EXECUTIONS]) {
double avg = 0;
for(int i = 0; i < PROGRAM_EXECUTIONS; i++)
avg += v[i];
return avg / PROGRAM_EXECUTIONS;
}
// Read command line, initialize, and create threads
int main(int argc, char *argv[]) {
// Read command line args if any.
readCommandLine(argc, argv);
// Initialize the matrix
initializeMatrix();
// Store for each number of processors used, the times it took to execute each program.
// execution_times[i][j] = time (s) it took to execute with i processors program j.
// Note execution_times[0][i] is the sequential time of program execution i.
double execution_times[numWorkers][PROGRAM_EXECUTIONS];
for(int num_proc = 0; num_proc < numWorkers; num_proc++) {
// Set the number of threads for openMP.
omp_set_num_threads(num_proc + 1);
printf("\n\n============================| NUM PROCESSORS: %d |============================\n", num_proc + 1);
for (int num_prog = 0; num_prog < PROGRAM_EXECUTIONS; num_prog++) {
// Reset variables.
int total_sum = 0, i, j;
struct Extreme minimum, maximum;
minimum.value = MATRIX_LIMIT;
maximum.value = -MATRIX_LIMIT;
printf("\nProgram execution number %d: \n", num_prog + 1);
// Start timer
start_time = omp_get_wtime();
//#pragma omp declare reduction(myMin : struct Extreme : combinerMin) initializer(omp_orig = initMin)
//#pragma omp declare reduction(myMax : struct Extreme : combinerMax) initializer(omp_priv = initMax)
#pragma omp parallel for reduction (+:total_sum) private(j) // (myMin: minimum) (myMax: maximum)
for (i = 0; i < size; i++) {
for (j = 0; j < size; j++) {
total_sum += matrix[i][j];
#pragma omp critical
{
struct Extreme candidate = {.value = matrix[i][j], .pos_i = i, .pos_j = j};
minimum = min(minimum, candidate);
maximum = max(maximum, candidate);
}
}
}
// Implicit barrier
end_time = omp_get_wtime();
execution_times[num_proc][num_prog] = end_time - start_time;
printf("The total sum is %d\n", total_sum);
printf("The minimum is %d at (%d, %d)\n", minimum.value, minimum.pos_i, minimum.pos_j);
printf("The maximum is %d at (%d, %d)\n", maximum.value, maximum.pos_i, maximum.pos_j);
printf("Sequential execution time: %f s\n", execution_times[0][num_prog]);
printf("Concurrent execution time: %f s\n", execution_times[num_proc][num_prog]);
}
double seq_avg = calculateAvg(execution_times[0]);
double exec_avg = calculateAvg(execution_times[num_proc]);
double speedup = (seq_avg / exec_avg) * 100;
printf("\nAverage sequential time (%d executions): %f", PROGRAM_EXECUTIONS, seq_avg);
printf("\nAverage execution time (%d executions) with %d processor(s): %f s\n", PROGRAM_EXECUTIONS, num_proc + 1, exec_avg);
printf("Speedup: %.2f%%", speedup);
}
printf("\n\n");
}
I solved it myself:
It suffices to use two combiner functions with pointers carefully:
/* Matrix summation using OpenMP
usage with gcc (version 4.2 or higher required):
gcc -O -fopenmp -o matrixSum-openmp matrixSum-openmp.c
./matrixSum-openmp size numWorkers
*/
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MAXSIZE 10500 // Maximum matrix size
#define MATRIX_LIMIT 10000000000 // Matrix elements range [0, MATRIX_LIMIT - 1]
#define MAXWORKERS 4 // Maximum number of workers
#define PROGRAM_EXECUTIONS 10 // Number of program executions
int numWorkers, size;
int matrix[MAXSIZE][MAXSIZE];
double start_time, end_time;
struct Extreme {
long int value;
int pos_i, pos_j;
};
struct Extreme min(struct Extreme e1, struct Extreme e2) {
return e1.value < e2.value ? e1 : e2;
}
struct Extreme max(struct Extreme e1, struct Extreme e2) {
return e1.value > e2.value ? e1 : e2;
}
void readCommandLine(int argc, char *argv[]) {
size = (argc > 1) ? atoi(argv[1]) : MAXSIZE;
numWorkers = (argc > 2) ? atoi(argv[2]) : MAXWORKERS;
if (size > MAXSIZE)
size = MAXSIZE;
if (numWorkers > MAXWORKERS)
numWorkers = MAXWORKERS;
}
void initializeMatrix() {
for (int i = 0; i < size; i++) {
//printf("[ ");
for (int j = 0; j < size; j++) {
matrix[i][j] = rand() % MATRIX_LIMIT;
//printf(" %d", matrix[i][j]);
}
//printf(" ]\n");
}
}
double calculateAvg(const double v[PROGRAM_EXECUTIONS]) {
double avg = 0;
for(int i = 0; i < PROGRAM_EXECUTIONS; i++)
avg += v[i];
return avg / PROGRAM_EXECUTIONS;
}
void combinerMin(struct Extreme *out, struct Extreme *in) {
*out = out->value < in->value ? *out : *in;
}
void combinerMax(struct Extreme *out, struct Extreme *in) {
*out = out->value > in->value ? *out : *in;
}
// Read command line, initialize, and create threads
int main(int argc, char *argv[]) {
srand(time(0));
// Read command line args if any.
readCommandLine(argc, argv);
// Initialize the matrix
initializeMatrix();
// Store for each number of processors used, the times it took to execute each program.
// execution_times[i][j] = time (s) it took to execute with i processors program j.
// Note execution_times[0][i] is the sequential time of program execution i.
double execution_times[numWorkers][PROGRAM_EXECUTIONS];
for(int num_proc = 0; num_proc < numWorkers; num_proc++) {
// Set the number of threads for openMP.
omp_set_num_threads(num_proc + 1);
printf("\n\n============================| NUM PROCESSORS: %d |============================\n", num_proc + 1);
for (int num_prog = 0; num_prog < PROGRAM_EXECUTIONS; num_prog++) {
// Reset variables.
int total_sum = 0, i, j;
struct Extreme minimum, maximum;
minimum.value = MATRIX_LIMIT;
maximum.value = -MATRIX_LIMIT;
printf("\nProgram execution number %d: \n", num_prog + 1);
// Start timer
start_time = omp_get_wtime();
#pragma omp declare reduction(myMin : struct Extreme : combinerMin(&omp_out, &omp_in)) initializer(omp_priv = omp_orig)
#pragma omp declare reduction(myMax : struct Extreme : combinerMax(&omp_out, &omp_in)) initializer(omp_priv = omp_orig)
#pragma omp parallel for reduction (+:total_sum) reduction (myMin: minimum) reduction (myMax: maximum) private(j)
for (i = 0; i < size; i++) {
for (j = 0; j < size; j++) {
total_sum += matrix[i][j];
struct Extreme candidate = {.value = matrix[i][j], .pos_i = i, .pos_j = j};
minimum = min(minimum, candidate);
maximum = max(maximum, candidate);
}
} // Implicit barrier
end_time = omp_get_wtime();
execution_times[num_proc][num_prog] = end_time - start_time;
printf("The total sum is %d\n", total_sum);
printf("The minimum is %ld at (%d, %d)\n", minimum.value, minimum.pos_i, minimum.pos_j);
printf("The maximum is %ld at (%d, %d)\n", maximum.value, maximum.pos_i, maximum.pos_j);
printf("Sequential execution time: %f s\n", execution_times[0][num_prog]);
printf("Concurrent execution time: %f s\n", execution_times[num_proc][num_prog]);
}
double seq_avg = calculateAvg(execution_times[0]);
double exec_avg = calculateAvg(execution_times[num_proc]);
double speedup = (seq_avg / exec_avg) * 100;
printf("\nAverage sequential time (%d executions): %f s", PROGRAM_EXECUTIONS, seq_avg);
printf("\nAverage execution time (%d executions): %f s\n", PROGRAM_EXECUTIONS, exec_avg);
printf("Speedup: %.2f%%", speedup);
}
printf("\n\n");
}