I'm exploring OpenMP in Fortran, using the Mandelbrot algorithm as an example:
!$omp parallel do reduction(+:iters)
do iy=0,30
do ix=0,70
c = cmplx(xmin+stepx*ix, ymin+stepy*iy, qp)
z = 0
escaped = .false.
do k=1,max_iters
z = z**2 + c
escaped = abs(z) > 2
if (escaped) exit
end do
iters(iy, ix) = k
end do
end do
!$omp end parallel do
Each iteration is fully independent, so iters(iy, ix)
is fully defined for a particular iy, ix
pair, a perfect parallel case.
I've experimented a bit with the $omp parallel do
directive and reduction(+:iters)
is doing the trick.
But to be honest I don't fully understand if that's the best way to do it, since I'm not summing anything for the (+:iters)
.
In this case, is reduction (+:iters)
the correct directive?
OK, I can reproduce your behaviour once I write a little driver program - if I remove the reduce I get different answers when comparing a serial run and a threaded one. To resolve it I'm not going to mess around with the default scoping rules which have caused confusion in the comments and other answers - I'm going to use default( none )
and do it properly. The code below works with no reduction - please use default( none )
in the future, and also please provide a minimal, reproducible example in future questions as it makes it much, much easier to investigate problems. Also note I have swapped your x and y loops around in the parallel version, while of small import in this case this results in the efficient order of array accesses for a column major language like Fortran
ijb@ijb-Latitude-5410:~/work/stack$ cat mandb.f90
Program mandb
Use, Intrinsic :: iso_fortran_env, Only : wp => real64
!$ Use omp_lib, Only : omp_get_max_threads
Implicit None( Type, External )
Integer, Parameter :: nx = 70
Integer, Parameter :: ny = 30
Integer, Dimension( 0:ny, 0:nx ) :: iters_ser
Integer, Dimension( 0:ny, 0:nx ) :: iters_par
Integer, Parameter :: max_iters = 1000
Real( wp ), Parameter :: xmin = -2.00_wp
Real( wp ), Parameter :: xmax = +0.25_wp
Real( wp ), Parameter :: stepx = ( xmax - xmin ) / nx
Real( wp ), Parameter :: ymin = -1.2_wp
Real( wp ), Parameter :: ymax = +1.2_wp
Real( wp ), Parameter :: stepy = ( ymax - ymin ) / ny
Complex( wp ) :: c
Complex( wp ) :: z
Integer :: ix, iy
Integer :: k
Logical :: escaped
iters_ser = 0
do iy=0,30
do ix=0,70
c = cmplx(xmin+stepx*ix, ymin+stepy*iy, wp)
z = 0
escaped = .false.
do k=1,max_iters
z = z**2 + c
escaped = abs(z) > 2
if (escaped) exit
end do
iters_ser(iy, ix) = k
end do
end do
!$ Write( *, * ) 'On ', omp_get_max_threads(), ' threads'
iters_par = 0
!$omp parallel default( none ) &
!$omp shared ( iters_par ) &
!$omp private( ix, iy, c, z, k, escaped )
!$omp do
do ix=0,70
do iy=0,30
c = cmplx(xmin+stepx*ix, ymin+stepy*iy, wp)
z = 0
escaped = .false.
do k=1,max_iters
z = z**2 + c
escaped = abs(z) > 2
if (escaped) exit
end do
iters_par(iy, ix) = k
end do
end do
!$omp end do
!$omp end parallel
Write( *, * ) 'Max error: ', Maxval( iters_par - iters_ser )
End Program mandb
ijb@ijb-Latitude-5410:~/work/stack$ gfortran -Wall -Wextra -pedantic -std=f2018 -O -g -fopenmp mandb.f90
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 4 threads
Max error: 0
That said, I wouldn't code it like the above:
ijb@ijb-Latitude-5410:~/work/stack$ cat mandb2.f90
Program mandb
Use, Intrinsic :: iso_fortran_env, Only : wp => real64
Implicit None( Type, External )
Integer, Parameter :: nx = 70
Integer, Parameter :: ny = 30
Integer, Dimension( 0:ny, 0:nx ) :: iters_ser
Integer, Dimension( 0:ny, 0:nx ) :: iters_par
Real( wp ), Parameter :: xmin = -2.00_wp
Real( wp ), Parameter :: xmax = +0.25_wp
Real( wp ), Parameter :: ymin = -1.2_wp
Real( wp ), Parameter :: ymax = +1.2_wp
Call do_mandelbrot( xmin, xmax, ymin, ymax, iters_ser )
!$omp parallel default( none ) shared( iters_par )
Call do_mandelbrot( xmin, xmax, ymin, ymax, iters_par )
!$omp end parallel
Write( *, * ) 'Max error: ', Maxval( iters_par - iters_ser )
Contains
Subroutine do_mandelbrot( xmin, xmax, ymin, ymax, iters )
Use, Intrinsic :: iso_fortran_env, Only : wp => real64
!$ Use omp_lib, Only : omp_get_thread_num
!$ Use omp_lib, Only : omp_get_num_threads
Implicit None( Type, External )
Real( wp ) , Intent( In ) :: xmin, xmax
Real( wp ) , Intent( In ) :: ymin, ymax
Integer, Dimension( 0:, 0: ), Intent( Out ) :: iters
Integer, Parameter :: max_iters = 1000
Complex( wp ) :: c, z
Real( wp ) :: stepx, stepy
Integer :: nx, ny
Integer :: ix, iy
Integer :: k
Logical :: escaped
ny = Ubound( iters, Dim = 1 )
nx = Ubound( iters, Dim = 2 )
stepy = ( ymax - ymin ) / ny
stepx = ( xmax - xmin ) / nx
!$ If( omp_get_thread_num() == 0 ) Then
!$ Write( *, * ) 'On ', omp_get_num_threads(), ' threads'
!$ End If
!$omp do collapse( 2 ) schedule( dynamic )
do ix=0,nx
do iy=0,ny
c = cmplx(xmin+stepx*ix, ymin+stepy*iy, wp)
z = 0
escaped = .false.
iters( iy, ix ) = 0
do k=1,max_iters
z = z**2 + c
escaped = abs(z) > 2
if (escaped) exit
end do
iters(iy, ix) = k
end do
end do
!$omp end do
End Subroutine do_mandelbrot
End Program mandb
ijb@ijb-Latitude-5410:~/work/stack$ gfortran -Wall -Wextra -pedantic -std=f2018 -O -g -fopenmp mandb2.f90
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
On 1 threads
On 4 threads
Max error: 0
ijb@ijb-Latitude-5410:~/work/stack$