Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Budiardja, Reuben
Fortran Frontier
Commits
b382ba2f
Commit
b382ba2f
authored
Sep 30, 2021
by
Budiardja, Reuben
Browse files
Updated Reconstruction_Kernel test case, reproduced slow-ness issue with offload with CCE
parent
4a3f3b74
Changes
3
Show whitespace changes
Inline
Side-by-side
Cases/Composite/ReconstructionKernel_OpenMP_Offload/Makefile
View file @
b382ba2f
clear
:
rm
-f
*
.mod
*
.o
ifeq
($(GENASIS_MACHINE), Cray_CCE)
FC
=
ftn
-fopenmp
endif
ifeq
($(GENASIS_MACHINE), POWER_XL)
FC
=
xlf2008_r
-qsmp
=
omp
-qoffload
endif
Reconstruction_Kernel_Test
:
\
Reconstruction_Kernel_Test.f90
\
Reconstruction_Kernel.f90
$(FC)
-c
Reconstruction_Kernel_Test.f90
$(FC)
-c
Reconstruction_Kernel.f90
$(FC)
Reconstruction_Kernel_Test.o Reconstruction_Kernel.o
\
-o
Reconstruction_Kernel_Test_
$(GENASIS_MACHINE)
clean
:
rm
-f
*
.mod
*
.o
*
.acc.
*
Cases/Composite/ReconstructionKernel_OpenMP_Offload/Reconstruction_Kernel.f90
View file @
b382ba2f
...
...
@@ -10,7 +10,6 @@ contains
integer
(
KDI
)
::
&
iS
,
&
iF
,
&
iF_R
,
&
iV
,
jV
,
kV
integer
(
KDI
),
dimension
(
3
)
::
&
iaS
,
&
...
...
@@ -50,19 +49,18 @@ contains
if
(
UseDevice
)
then
!$OMP target teams distribute parallel do
simd
collapse ( 4 ) &
!$OMP target teams distribute parallel do collapse ( 4 ) &
!$OMP schedule ( static, 1 ) &
!$OMP private ( iF,
iF_R,
iaVP, iaVM, fM, fC, fP, fI, fO ) &
!$OMP private ( iF, iaVP, iaVM, fM, fC, fP, fI, fO ) &
!$OMP private ( xAM, xAC, xAP, x2AM, x2AC, x2AP, xI, xO, xE ) &
!$OMP private ( c0, c1, c2, c2_S, d ) &
!$OMP firstprivate ( SqrtTiny )
!$OMP firstprivate ( SqrtTiny
, iaS
)
do
iS
=
1
,
size
(
iaSlctd
)
do
kV
=
lV
(
3
),
uV
(
3
)
do
jV
=
lV
(
2
),
uV
(
2
)
do
iV
=
lV
(
1
),
uV
(
1
)
iF
=
iaSlctd
(
iS
)
iF_R
=
iaSlctd_R
(
iS
)
iaVP
=
[
iV
,
jV
,
kV
]
+
iaS
iaVM
=
[
iV
,
jV
,
kV
]
-
iaS
...
...
@@ -75,9 +73,9 @@ contains
xAC
=
XA
(
iV
,
jV
,
kV
)
xAP
=
XA
(
iaVP
(
1
),
iaVP
(
2
),
iaVP
(
3
)
)
x2AM
=
X
2
A
(
iaVM
(
1
),
iaVM
(
2
),
iaVM
(
3
)
)
x2AC
=
X
2A
(
iV
,
jV
,
kV
)
x2AP
=
X
2A
(
iaVP
(
1
),
iaVP
(
2
),
iaVP
(
3
)
)
x2AM
=
XA
(
iaVM
(
1
),
iaVM
(
2
),
iaVM
(
3
)
)
**
2
x2AC
=
X
(
iV
,
jV
,
kV
)
**
2
x2AP
=
X
(
iaVP
(
1
),
iaVP
(
2
),
iaVP
(
3
)
)
**
2
xI
=
X
(
iV
,
jV
,
kV
)
-
0.5
*
dX
(
iV
,
jV
,
kV
)
xO
=
X
(
iV
,
jV
,
kV
)
+
0.5
*
dX
(
iV
,
jV
,
kV
)
...
...
@@ -217,10 +215,10 @@ contains
end
if
!-- Local extremum
F_IR
(
iV
,
jV
,
kV
,
i
F_R
)
&
F_IR
(
iV
,
jV
,
kV
,
i
S
)
&
=
c0
+
c1
*
xI
+
c2
*
xI
**
2
F_IL
(
iaVP
(
1
),
iaVP
(
2
),
iaVP
(
3
),
i
F_R
)
&
F_IL
(
iaVP
(
1
),
iaVP
(
2
),
iaVP
(
3
),
i
S
)
&
=
c0
+
c1
*
xO
+
c2
*
xO
**
2
!call Show ( '>>> Final values' )
...
...
@@ -231,23 +229,22 @@ contains
end
do
!-- jV
end
do
!-- kV
end
do
!-- iS
!$OMP end target teams distribute parallel do
simd
!$OMP end target teams distribute parallel do
else
!-- use host
!$OMP parallel do collapse ( 4 ) &
!$OMP schedule ( runtime ) &
!$OMP private ( iF,
iF_R,
iaVP, iaVM, fM, fC, fP, fI, fO ) &
!$OMP private ( iF, iaVP, iaVM, fM, fC, fP, fI, fO ) &
!$OMP private ( xAM, xAC, xAP, x2AM, x2AC, x2AP, xI, xO, xE ) &
!$OMP private ( c0, c1, c2, c2_S, d ) &
!$OMP firstprivate ( SqrtTiny )
!$OMP firstprivate ( SqrtTiny
, iaS
)
do
iS
=
1
,
size
(
iaSlctd
)
do
kV
=
lV
(
3
),
uV
(
3
)
do
jV
=
lV
(
2
),
uV
(
2
)
do
iV
=
lV
(
1
),
uV
(
1
)
iF
=
iaSlctd
(
iS
)
iF_R
=
iaSlctd_R
(
iS
)
iaVP
=
[
iV
,
jV
,
kV
]
+
iaS
iaVM
=
[
iV
,
jV
,
kV
]
-
iaS
...
...
@@ -402,10 +399,10 @@ contains
end
if
!-- Local extremum
F_IR
(
iV
,
jV
,
kV
,
i
F_R
)
&
F_IR
(
iV
,
jV
,
kV
,
i
S
)
&
=
c0
+
c1
*
xI
+
c2
*
xI
**
2
F_IL
(
iaVP
(
1
),
iaVP
(
2
),
iaVP
(
3
),
i
F_R
)
&
F_IL
(
iaVP
(
1
),
iaVP
(
2
),
iaVP
(
3
),
i
S
)
&
=
c0
+
c1
*
xO
+
c2
*
xO
**
2
!call Show ( '>>> Final values' )
...
...
Cases/Composite/ReconstructionKernel_OpenMP_Offload/Reconstruction_Kernel_Test.f90
View file @
b382ba2f
...
...
@@ -7,8 +7,19 @@ module Reconstruction_Form
KDR
=
kind
(
1.0d0
),
&
KDL
=
kind
(
.true.
)
integer
,
parameter
::
&
N_CELLS
=
8
,
&
!-- Change N_CELLS to adjust problem size,
! N_COMPUTE to adjust the number of time the kernel is run.
!-- A 'typical' run would have N_CELLS = 128 or 256, and
! 6 reconstruction kernels per time step ( 1 per dim x 2 for 2nd order),
! with a total of 1 millions time steps (e.g. a typical supernova
! simulations.)
integer
(
KDI
),
parameter
::
&
N_CELLS
=
64
,
&
N_COMPUTES
=
10
integer
(
KDI
),
parameter
::
&
N_FIELDS
=
2
,
&
N_GHOSTS
=
2
...
...
@@ -65,6 +76,7 @@ module Reconstruction_Form
contains
subroutine
Initialize
(
R
)
class
(
ReconstructionForm
),
intent
(
inout
)
::
&
...
...
@@ -74,6 +86,8 @@ contains
iC
,
&
nWavelength
real
(
KDR
)
::
&
Pi
,
&
TwoPi
,
&
Offset
,
&
Amplitude
real
(
KDR
),
dimension
(
3
)
::
&
...
...
@@ -100,9 +114,10 @@ contains
associate
(
&
dX
=>
R
%
Width
(
1
,
1
,
1
),
&
X
=>
R
%
Center
,
&
Pi
=>
acos
(
-1.0_KDR
),
&
TwoPi
=>
2.0_KDR
*
acos
(
-1.0_KDR
)
)
X
=>
R
%
Center
)
Pi
=
acos
(
-1.0_KDR
)
TwoPi
=
2.0_KDR
*
acos
(
-1.0_KDR
)
X
=
spread
(
spread
(
[
(
(
dX
/
2
)
+
(
dX
*
(
iC
-
3
)
),
&
iC
=
1
,
N_CELLS
+
4
)
],
&
...
...
@@ -115,9 +130,9 @@ contains
F1
=>
R
%
Field
(
:,
:,
:,
1
),
&
F2
=>
R
%
Field
(
:,
:,
:,
2
)
)
call
Show_3D_R
(
X
,
'Center_X'
)
call
Show_3D_R
(
Y
,
'Center_Y'
)
call
Show_3D_R
(
Z
,
'Center_Z'
)
!
call Show_3D_R ( X, 'Center_X' )
!
call Show_3D_R ( Y, 'Center_Y' )
!
call Show_3D_R ( Z, 'Center_Z' )
!-- Set fields
K
=
1.0_KDR
...
...
@@ -130,8 +145,8 @@ contains
+
Amplitude
*
2.0_KDR
&
*
sin
(
Pi
*
(
K
(
1
)
*
X
+
K
(
2
)
*
Y
+
K
(
3
)
*
Z
)
)
call
Show_3D_R
(
F1
,
'SetWave'
)
call
Show_3D_R
(
F2
,
'SetWave_2'
)
!
call Show_3D_R ( F1, 'SetWave' )
!
call Show_3D_R ( F2, 'SetWave_2' )
end
associate
!-- X, Y, Z
...
...
@@ -183,10 +198,12 @@ contains
end
subroutine
Show_3D_R
subroutine
Compute
(
R
)
subroutine
Compute
(
R
,
UseDeviceOption
)
class
(
ReconstructionForm
),
intent
(
inout
)
::
&
R
logical
(
KDL
),
intent
(
in
),
optional
::
&
UseDeviceOption
integer
(
KDI
)
::
&
iDimension
...
...
@@ -196,7 +213,7 @@ contains
(
R
%
Field
,
R
%
Center
,
R
%
Width
,
&
R
%
Average_1_U
,
R
%
Average_2_U
,
R
%
iaSelected
,
&
R
%
iaSelected
,
iDimension
,
N_GHOSTS
,
&
R
%
Field_IL
,
R
%
Field_IR
,
UseDeviceOption
=
.true.
)
R
%
Field_IL
,
R
%
Field_IR
,
UseDeviceOption
=
UseDeviceOption
)
end
subroutine
Compute
...
...
@@ -206,14 +223,43 @@ end module Reconstruction_Form
program
Reconstruction_Form_Test
use
OMP_LIB
use
Reconstruction_Form
implicit
none
integer
(
KDI
)
::
&
iC
,
&
nComputes
=
10
real
(
KDR
)
::
&
TimeStart
,
&
TimeStop
type
(
ReconstructionForm
)
::
&
RF
call
RF
%
Initialize
(
)
call
RF
%
Compute
(
)
TimeStart
=
OMP_GET_WTIME
(
)
do
iC
=
1
,
N_COMPUTES
call
RF
%
Compute
(
UseDeviceOption
=
.true.
)
end
do
TimeStop
=
OMP_GET_WTIME
(
)
print
'(a30, i5)'
,
'N_CELLS : '
,
N_CELLS
print
'(a30, i5)'
,
'N_COMPUTES : '
,
N_COMPUTES
print
*
print
'(a30, es15.6e3)'
,
'Kernel Offload Timing (s) : '
,
TimeStop
-
TimeStart
print
*
TimeStart
=
OMP_GET_WTIME
(
)
do
iC
=
1
,
N_COMPUTES
call
RF
%
Compute
(
UseDeviceOption
=
.false.
)
end
do
TimeStop
=
OMP_GET_WTIME
(
)
print
'(a30, i5)'
,
'CPU nThreads : '
,
OMP_GET_MAX_THREADS
(
)
print
'(a30, es15.6e3)'
,
'Kernel CPU Timing (s) : '
,
TimeStop
-
TimeStart
end
program
Reconstruction_Form_Test
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment