OpenMP: efficiency in reduction in large array

Is the following equivalent to your calculation ?

program main

    use iso_Fortran_env, only: rk => real64, ik => int32
    use omp_lib
    implicit none

    integer(ik) :: i, j, t, ipr
    real(rk), dimension(10, 10, 3) :: a
    real(rk), dimension(10) :: jgrid, tgrid, x
    real(rk), dimension(3, 3) :: pii
    real(rk), dimension(3) :: igrid
    real(rk) :: time0,time1, sec
    
    time0 = omp_get_wtime()
    sec = elapse_sec ()
    a     = 0.0_rk
    jgrid = 1.0_rk
    tgrid = 2.0_rk
    igrid = 3.0_rk
    pii = transpose(reshape([0.1_rk, 0.5_rk, 0.4_rk, &
                             0.3_rk, 0.6_rk, 0.1_rk, &
                             0.4_rk, 0.1_rk, 0.5_rk], [3, 3]))

    ! -------------------------------------------- !
    ! !$omp parallel do collapse(4) reduction(+:a) !
    ! -------------------------------------------- !
  !$omp parallel do PRIVATE (ipr,j,t,i,x) SHARED (a,igrid,pii,jgrid,tgrid)
     do ipr = 1, 3, 1
        do j = 1, 10, 1
            do t = 1, 10, 1
                x(t) = jgrid(j)*tgrid(t) * 3.
                do i = 1, 3, 1
!z                    a(t, j, ipr) =  a(t, j, ipr) + igrid(i)**pii(i, ipr) + jgrid(j)*tgrid(t)
                    x(t) = x(t) + igrid(i)**pii(i, ipr)
                enddo
            enddo
            a(:, j, ipr) =  x
        enddo
    enddo
  !$omp end parallel do

    write(*, *) sum(a)

    time1 = omp_get_wtime()
    sec = elapse_sec () - sec
    write(*, '(a, F12.6, a)') 'elapsed time: ', time1 - time0, ' seconds.'
    write (*,*) 'elapse', sec

  
   contains
     real*8 function elapse_sec ()
     integer*8 clock, rate
     call SYSTEM_CLOCK ( clock, rate )
       elapse_sec = dble(clock)/dble(rate)
     end function elapse_sec
end program main

omp_get_wtime is implemented poorly on the gfortran I use

1 Like