cmallocopenmp

How to do *user defined reduction* on *allocatable array* and *user reduction functions* with openMP in C?


I have written some programs with OMP reduction directive in Fortran and in C. But the data types were simple (int, float, arrays with fixed size, ...) and reduction-identifiers used were implicitly declared (+,*,- ...). Now, I would like to go a step beyond and understand deeply how OMP reduction works. I know only that a local copy is done and at the end of the parallel part, all the local copies are combined.

Considering the following single threaded program (This is a toy example) :

// Toy example : Non Parallel Version
#include <stdlib.h>
#include <stdio.h>

void my_add(int *res, int arr_sz) {
  for (int i=0; i<arr_sz; i++)
    res[i] += i;
}

int main(void) {

  int arr_sz=10; // Input parameter in real program
  int *my_array;
  my_array = malloc (sizeof(int)*arr_sz);
  for (int i=0; i<arr_sz; i++) my_array[i]=0; // init array
        
  for (int iloop=0; iloop<1024; iloop++) 
    my_add(my_array, arr_sz);

  for (int i=0; i<arr_sz; i++) printf("%d : %d\n",i,my_array[i]); // print array
  free(my_array); 

  return 0;
}

I would like to parallelize the for loop using a user defined reduction on an allocatable array and user reduction function. But I have many problems. This is an example of code to discuss (of course it does not compile):

// Toy example : Parallel Version
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>

void my_add(int *res, int arr_sz) {
  for (int i=0; i<arr_sz; i++)
    res[i] += i;
}



int main(void) {

  int arr_sz=10; // Input parameter in real program
  int *my_array;
  my_array = malloc (sizeof(int)*arr_sz);
  for (int i=0; i<arr_sz; i++) my_array[i]=0; // init array
  
  #pragma omp declare reduction (my_omp_red : int* : my_add(omp_out) ) initializer (omp_priv=omp_orig)
  // How to add parameters other than omp_out and omp_in in the reduction function ?
  // How to write the initializer, because my_array is a pointer that has been allocated ?
  #pragma omp parallel 
  {
  #pragma omp for reduction (my_omp_red:my_array)
  for (int iloop=0; iloop<1024; iloop++) {
    my_add(my_array, arr_sz);
  }
  
  } // End of omp parallel
  
  for (int i=0; i<arr_sz; i++) printf("%d : %d\n",i,my_array[i]); // print array
  free(my_array); 

  return 0;
}

The reduction specifier is my_omp_red, the typename-list is int* because I would like to work on a allocatable array.

First, I do not know how to write the combiner because there is a parameter (arr_sz) to the function (my_add) and it seems that only omp_out and omp_in are allowed. This is a toy example, but this question is more generic : How to write combiner to use functions with parameters ?

And, also, I do not know how to write the initializer, because my_array is only a pointer, not a array. So, I suppose the local copy will be a pointer and I imagine an allocation is needed in the parallel part of code ? But how to free the pointer ? Etc, etc ... To be honest, I am a little bit lost.

Thanks for help.

EDIT 1

I have written a code that answers to the questions above but without reduction directive. Instead, critical directive has been used and allocation of array has been done for each thread.

#include <stdlib.h>
#include <stdio.h>
#include <omp.h>

void my_add(int *res, int arr_sz) {
  for (int i=0; i<arr_sz; i++)
    res[i] += i;
}

int main(void) {

  int arr_sz=10; // Input parameter in real program
  int *my_array;
  my_array = malloc (sizeof(int)*arr_sz);
  for (int i=0; i<arr_sz; i++) my_array[i]=0; // init array
  
  #pragma omp parallel 
  {
  int *local_array = malloc (sizeof(int)*arr_sz);
  for (int i=0; i<arr_sz; i++) local_array[i]=0; // init local_array
  
  #pragma omp for
  for (int iloop=0; iloop<1024; iloop++) {
    my_add(local_array, arr_sz);
  }
  
  #pragma omp critical
  {
  for (int i=0; i<arr_sz; i++) my_array[i]+=local_array[i];
  }
  
  free(local_array);
  } // End of omp parallel
  
  for (int i=0; i<arr_sz; i++) printf("%d : %d\n",i,my_array[i]); // print array
  free(my_array); 

  return 0;
}

I wonder if this code could be write with the reduction directive. .


Solution

  • Combining dynamically allocated arrays is possible with normal reduction. A reduction in OpenMP always works on array elements. A user-defined reduction would be necessary, if you want to combine elements which are not base types, or if you want to use combine operations which don't match the provided operations.

    Here is the version of your code using reduction:

    #include <stdlib.h>
    #include <stdio.h>
    #include <omp.h>
    
    void my_add(int *res, int arr_sz) {
      for (int i=0; i<arr_sz; i++)
        res[i] += i;
    }
    
    int main(void) {
    
      int arr_sz=10; // Input parameter in real program
      int *my_array;
      my_array = malloc (sizeof(int)*arr_sz);
      for (int i=0; i<arr_sz; i++) my_array[i]=0; // init array
      
      #pragma omp parallel for reduction(+:my_array[:arr_sz])
      for (int iloop=0; iloop<1024; iloop++) {
        my_add(my_array, arr_sz);
      }
      
      for (int i=0; i<arr_sz; i++) printf("%d : %d\n",i,my_array[i]); // print array
      free(my_array); 
    
      return 0;
    }