I must be doing something wrong.
This is a quicksort example adapted
from "Using OpenMP - the Next Step" p118 onwards.
The program does a serial quicksort, and then
a parallel version with tasks + checks.
No speedup on any number of threads with gfortran9 or ifort18.
Please advise.
Anton
! Quicksort
module m
use, intrinsic :: iso_fortran_env
use :: omp_lib
implicit none
contains
integer function part_ind( a, start, end )
real, intent(inout) :: a(:)
integer, intent(in) :: start, end
integer :: pind, i, sind
real :: pval
! write (*,*) omp_get_thread_num()
pind = (start+end)/2
pval = a( pind )
call swap( a(end), a(pind) )
sind = start
do i=start, end-1
if ( a(i) .lt. pval ) then
call swap( a(i), a(sind) )
sind = sind + 1
end if
! write (*,*) i, a
end do
call swap( a(sind), a(end) )
part_ind = sind
end function part_ind
subroutine swap( a, b )
real, intent(inout) :: a, b
real :: tmp
tmp = a
a = b
b = tmp
end subroutine swap
recursive subroutine qsort( a, start, end )
real, intent(inout) :: a(:)
integer, intent(in) :: start, end
integer :: index
if ( start .lt. end ) then
index = part_ind( a, start, end )
call qsort( a, start, index-1 )
call qsort( a, index+1, end )
end if
end subroutine qsort
recursive subroutine omp_qsort( a, start, end )
real, intent(inout) :: a(:)
integer, intent(in) :: start, end
integer, parameter :: cutoff = 10
integer :: index
if ( start .lt. end ) then
index = part_ind( a, start, end )
!omp task final( (index-start) < cutoff ) mergeable &
!omp default(none) shared(a) firstprivate(start, index)
call omp_qsort( a, start, index-1 )
!omp end task
!omp task final( (end-index) < cutoff ) mergeable &
!omp default(none) shared(a) firstprivate(index, end)
call omp_qsort( a, index+1, end )
!omp end task
end if
end subroutine omp_qsort
end module m
program p
use :: m
use, intrinsic :: iso_fortran_env
use :: omp_lib
implicit none
integer, parameter :: arrsize = 2**25
real :: a( arrsize ), a_save( arrsize ), time1, time2
integer :: index, i
! Enable nested parallelism
!call omp_set_nested( .true. )
! Establish an array
call random_number( a )
! write (*,*) a
! Preserve the original array
a_save = a
! Sort
call cpu_time(time1)
call qsort( a, 1, arrsize )
call cpu_time(time2)
! write (*,*) a, "dt:", time2-time1
write (*,*) "Serial dt:", time2-time1
! Check
do i=1,arrsize-1
if ( a(i+1) .lt. a(i) ) then
write (*,"(3(a,i0))") "ERROR: not sorted: a(", i, ") > a(", i+1, ")"
end if
end do
call cpu_time(time1)
!$omp parallel default(none) shared( a_save )
!$omp single
call omp_qsort( a_save, 1, arrsize)
!$omp end single nowait
!$omp end parallel
call cpu_time(time2)
write (*,*) "OMP dt:", time2-time1
if ( any(a .ne. a_save) ) write (*,*) "ERROR: serial version does not match OpenMP"
end program p