Commit b382ba2f authored by Budiardja, Reuben's avatar Budiardja, Reuben
Browse files

Updated Reconstruction_Kernel test case, reproduced slow-ness issue with offload with CCE

parent 4a3f3b74
clear:
rm -f *.mod *.o
ifeq ($(GENASIS_MACHINE), Cray_CCE)
FC=ftn -fopenmp
endif
ifeq ($(GENASIS_MACHINE), POWER_XL)
FC=xlf2008_r -qsmp=omp -qoffload
endif
Reconstruction_Kernel_Test: \
Reconstruction_Kernel_Test.f90 \
Reconstruction_Kernel.f90
$(FC) -c Reconstruction_Kernel_Test.f90
$(FC) -c Reconstruction_Kernel.f90
$(FC) Reconstruction_Kernel_Test.o Reconstruction_Kernel.o \
-o Reconstruction_Kernel_Test_$(GENASIS_MACHINE)
clean:
rm -f *.mod *.o *.acc.*
......@@ -10,7 +10,6 @@ contains
integer ( KDI ) :: &
iS, &
iF, &
iF_R, &
iV, jV, kV
integer ( KDI ), dimension ( 3 ) :: &
iaS, &
......@@ -50,19 +49,18 @@ contains
if ( UseDevice ) then
!$OMP target teams distribute parallel do simd collapse ( 4 ) &
!$OMP target teams distribute parallel do collapse ( 4 ) &
!$OMP schedule ( static, 1 ) &
!$OMP private ( iF, iF_R, iaVP, iaVM, fM, fC, fP, fI, fO ) &
!$OMP private ( iF, iaVP, iaVM, fM, fC, fP, fI, fO ) &
!$OMP private ( xAM, xAC, xAP, x2AM, x2AC, x2AP, xI, xO, xE ) &
!$OMP private ( c0, c1, c2, c2_S, d ) &
!$OMP firstprivate ( SqrtTiny )
!$OMP firstprivate ( SqrtTiny, iaS )
do iS = 1, size ( iaSlctd )
do kV = lV ( 3 ), uV ( 3 )
do jV = lV ( 2 ), uV ( 2 )
do iV = lV ( 1 ), uV ( 1 )
iF = iaSlctd ( iS )
iF_R = iaSlctd_R ( iS )
iF = iaSlctd ( iS )
iaVP = [ iV, jV, kV ] + iaS
iaVM = [ iV, jV, kV ] - iaS
......@@ -75,9 +73,9 @@ contains
xAC = XA ( iV, jV, kV )
xAP = XA ( iaVP ( 1 ), iaVP ( 2 ), iaVP ( 3 ) )
x2AM = X2A ( iaVM ( 1 ), iaVM ( 2 ), iaVM ( 3 ) )
x2AC = X2A ( iV, jV, kV )
x2AP = X2A ( iaVP ( 1 ), iaVP ( 2 ), iaVP ( 3 ) )
x2AM = XA ( iaVM ( 1 ), iaVM ( 2 ), iaVM ( 3 ) ) ** 2
x2AC = X ( iV, jV, kV ) ** 2
x2AP = X ( iaVP ( 1 ), iaVP ( 2 ), iaVP ( 3 ) ) ** 2
xI = X ( iV, jV, kV ) - 0.5 * dX ( iV, jV, kV )
xO = X ( iV, jV, kV ) + 0.5 * dX ( iV, jV, kV )
......@@ -217,10 +215,10 @@ contains
end if !-- Local extremum
F_IR ( iV, jV, kV, iF_R ) &
F_IR ( iV, jV, kV, iS ) &
= c0 + c1 * xI + c2 * xI**2
F_IL ( iaVP ( 1 ), iaVP ( 2 ), iaVP ( 3 ), iF_R ) &
F_IL ( iaVP ( 1 ), iaVP ( 2 ), iaVP ( 3 ), iS ) &
= c0 + c1 * xO + c2 * xO**2
!call Show ( '>>> Final values' )
......@@ -231,23 +229,22 @@ contains
end do !-- jV
end do !-- kV
end do !-- iS
!$OMP end target teams distribute parallel do simd
!$OMP end target teams distribute parallel do
else !-- use host
!$OMP parallel do collapse ( 4 ) &
!$OMP schedule ( runtime ) &
!$OMP private ( iF, iF_R, iaVP, iaVM, fM, fC, fP, fI, fO ) &
!$OMP private ( iF, iaVP, iaVM, fM, fC, fP, fI, fO ) &
!$OMP private ( xAM, xAC, xAP, x2AM, x2AC, x2AP, xI, xO, xE ) &
!$OMP private ( c0, c1, c2, c2_S, d ) &
!$OMP firstprivate ( SqrtTiny )
!$OMP firstprivate ( SqrtTiny, iaS )
do iS = 1, size ( iaSlctd )
do kV = lV ( 3 ), uV ( 3 )
do jV = lV ( 2 ), uV ( 2 )
do iV = lV ( 1 ), uV ( 1 )
iF = iaSlctd ( iS )
iF_R = iaSlctd_R ( iS )
iF = iaSlctd ( iS )
iaVP = [ iV, jV, kV ] + iaS
iaVM = [ iV, jV, kV ] - iaS
......@@ -402,10 +399,10 @@ contains
end if !-- Local extremum
F_IR ( iV, jV, kV, iF_R ) &
F_IR ( iV, jV, kV, iS ) &
= c0 + c1 * xI + c2 * xI**2
F_IL ( iaVP ( 1 ), iaVP ( 2 ), iaVP ( 3 ), iF_R ) &
F_IL ( iaVP ( 1 ), iaVP ( 2 ), iaVP ( 3 ), iS ) &
= c0 + c1 * xO + c2 * xO**2
!call Show ( '>>> Final values' )
......
......@@ -7,8 +7,19 @@ module Reconstruction_Form
KDR = kind ( 1.0d0 ), &
KDL = kind ( .true. )
integer, parameter :: &
N_CELLS = 8, &
!-- Change N_CELLS to adjust problem size,
! N_COMPUTE to adjust the number of time the kernel is run.
!-- A 'typical' run would have N_CELLS = 128 or 256, and
! 6 reconstruction kernels per time step ( 1 per dim x 2 for 2nd order),
! with a total of 1 millions time steps (e.g. a typical supernova
! simulations.)
integer ( KDI ), parameter :: &
N_CELLS = 64, &
N_COMPUTES = 10
integer ( KDI ), parameter :: &
N_FIELDS = 2, &
N_GHOSTS = 2
......@@ -65,6 +76,7 @@ module Reconstruction_Form
contains
subroutine Initialize ( R )
class ( ReconstructionForm ), intent ( inout ) :: &
......@@ -74,6 +86,8 @@ contains
iC, &
nWavelength
real ( KDR ) :: &
Pi, &
TwoPi, &
Offset, &
Amplitude
real ( KDR ), dimension ( 3 ) :: &
......@@ -100,24 +114,25 @@ contains
associate ( &
dX => R % Width ( 1, 1, 1 ), &
X => R % Center, &
Pi => acos ( -1.0_KDR ), &
TwoPi => 2.0_KDR * acos ( -1.0_KDR ) )
X => R % Center )
Pi = acos ( -1.0_KDR )
TwoPi = 2.0_KDR * acos ( -1.0_KDR )
X = spread ( spread ( [ ( ( dX / 2 ) + ( dX * ( iC - 3 ) ), &
iC = 1, N_CELLS + 4 ) ], &
dim = 2, ncopies = size ( X, dim = 2 ) ), &
dim = 3, ncopies = size ( R % Center, dim = 3 ) )
associate &
( Y => reshape ( R % Center, shape ( X ), order = [ 2, 3, 1 ] ), &
Z => reshape ( R % Center, shape ( X ), order = [ 3, 2, 1 ] ), &
F1 => R % Field ( :, :, :, 1 ), &
F2 => R % Field ( :, :, :, 2 ) )
call Show_3D_R ( X, 'Center_X' )
call Show_3D_R ( Y, 'Center_Y' )
call Show_3D_R ( Z, 'Center_Z' )
!call Show_3D_R ( X, 'Center_X' )
!call Show_3D_R ( Y, 'Center_Y' )
!call Show_3D_R ( Z, 'Center_Z' )
!-- Set fields
K = 1.0_KDR
......@@ -130,8 +145,8 @@ contains
+ Amplitude * 2.0_KDR &
* sin ( Pi * ( K ( 1 ) * X + K ( 2 ) * Y + K ( 3 ) * Z ) )
call Show_3D_R ( F1, 'SetWave' )
call Show_3D_R ( F2, 'SetWave_2' )
!call Show_3D_R ( F1, 'SetWave' )
!call Show_3D_R ( F2, 'SetWave_2' )
end associate !-- X, Y, Z
......@@ -183,10 +198,12 @@ contains
end subroutine Show_3D_R
subroutine Compute ( R )
subroutine Compute ( R, UseDeviceOption )
class ( ReconstructionForm ), intent ( inout ) :: &
R
logical ( KDL ), intent ( in ), optional :: &
UseDeviceOption
integer ( KDI ) :: &
iDimension
......@@ -196,7 +213,7 @@ contains
( R % Field, R % Center, R % Width, &
R % Average_1_U, R % Average_2_U, R % iaSelected, &
R % iaSelected, iDimension, N_GHOSTS, &
R % Field_IL, R % Field_IR, UseDeviceOption = .true. )
R % Field_IL, R % Field_IR, UseDeviceOption = UseDeviceOption )
end subroutine Compute
......@@ -206,14 +223,43 @@ end module Reconstruction_Form
program Reconstruction_Form_Test
use OMP_LIB
use Reconstruction_Form
implicit none
integer ( KDI ) :: &
iC, &
nComputes = 10
real ( KDR ) :: &
TimeStart, &
TimeStop
type ( ReconstructionForm ) :: &
RF
call RF % Initialize ( )
call RF % Compute ( )
TimeStart = OMP_GET_WTIME ( )
do iC = 1, N_COMPUTES
call RF % Compute ( UseDeviceOption = .true. )
end do
TimeStop = OMP_GET_WTIME ( )
print '(a30, i5)', 'N_CELLS : ', N_CELLS
print '(a30, i5)', 'N_COMPUTES : ', N_COMPUTES
print*
print '(a30, es15.6e3)', 'Kernel Offload Timing (s) : ', TimeStop - TimeStart
print*
TimeStart = OMP_GET_WTIME ( )
do iC = 1, N_COMPUTES
call RF % Compute ( UseDeviceOption = .false. )
end do
TimeStop = OMP_GET_WTIME ( )
print '(a30, i5)', 'CPU nThreads : ', OMP_GET_MAX_THREADS ( )
print '(a30, es15.6e3)', 'Kernel CPU Timing (s) : ', TimeStop - TimeStart
end program Reconstruction_Form_Test
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment