Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
E3SM
Kernels
Commits
11bad468
Commit
11bad468
authored
Dec 31, 2020
by
Youngsung Kim
Browse files
baseline performance is collected
parent
f790c860
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
ocn/gm_compute_Bolus_velocity/Makefile
View file @
11bad468
...
...
@@ -36,7 +36,7 @@ _PREPROCFLAG :=
# 1<= verbosity <= 3
VERBOSITY
?=
1
# repeat >= 1
REPEAT
?=
1
REPEAT
?=
1
#2
# skip sum check during data loading
SKIP_SUMCHECK
?=
...
...
@@ -69,7 +69,7 @@ else ifeq (${COMP}, crayftn)
else
ifeq
(${COMP}, pgfortran)
FC_0
:=
pgfortran
#FC_FLAGS_SET_0 := -i4 -time -Mstack_arrays -Mextend -byteswapio -Mflushz -Kieee -Mallocatable=03 -O2 -Mpreprocess
#FC_FLAGS_SET_0 :=
-gopt
-i4 -time -Mstack_arrays -Mextend -byteswapio -Mflushz -Kieee -Mallocatable=03 -O2 -Mpreprocess
FC_FLAGS_SET_0
:=
-gopt
-i4
-time
-Mstack_arrays
-Mextend
-byteswapio
-Mflushz
-Kieee
-Mallocatable
=
03
-O2
-Mpreprocess
-ta
=
tesla:debug,lineinfo
-Minfo
=
all,ccff
_PREPROCFLAG
:=
-Mpreprocess
...
...
@@ -170,11 +170,10 @@ ncu: ${OUTDIR}/${APP}.compute.ncu-rep
@
echo
"Output files are in
${OUTDIR}
"
${OUTDIR}/${APP}.compute.ncu-rep
:
${OUTDIR}/${APP}
#jsrun -n1 -c1 -g1 -a1 --smpiargs="-disable_gpu_hooks" -- ncu --target-processes=all -c 1500 --set=full --force-overwrite -o
${OUTDIR}
/
${APP}
.compute
${OUTDIR}
/
${APP}
jsrun
-n1
-c1
-g1
-a1
--smpiargs
=
"-disable_gpu_hooks"
--
ncu
--target-processes
=
all
--set
=
full
--force-overwrite
-o
${OUTDIR}
/
${APP}
.compute
${OUTDIR}
/
${APP}
${OUTDIR}/${APP}.systems.qdrep
:
${OUTDIR}/${APP}
jsrun
-n1
-c1
-g1
-a1
--
nsys profile
-o
${OUTDIR}
/
${APP}
.systems.qdrep
-f
true
-t
cuda,osrt,openacc
${OUTDIR}
/
${APP}
jsrun
-n1
-c1
-g1
-a1
--
nsys profile
-o
${OUTDIR}
/
${APP}
.systems.qdrep
-f
true
-t
cuda,osrt,openacc
${OUTDIR}
/
${APP}
||
true
${OUTDIR}/${APP}
:
build
...
...
@@ -185,7 +184,7 @@ run: build
${_MPIRUN}
./kernel.exe
mpas_ocn_gm.o
:
mpas_ocn_gm.f90 mpas_ocn_constants.o mpas_constants.o mpas_kind_types.o kgen_utils.o tprof_mod.o
${FC_0}
${FC_FLAGS_SET_0}
${_PREPROCFLAG}
-DMAX_TOL
=
${MAX_TOL}
-DVERBOSITY
=
${VERBOSITY}
-DNUM_REPEAT
=
${REPEAT}
-c
-o
$@
$
<
${FC_0}
${FC_FLAGS_SET_0}
${_PREPROCFLAG}
-DMAX_TOL
=
${MAX_TOL}
-DVERBOSITY
=
${VERBOSITY}
-DNUM_REPEAT
=
${REPEAT}
-c
-o
$@
$
^
mpas_ocn_constants.o
:
mpas_ocn_constants.f90 kgen_utils.o tprof_mod.o mpas_kind_types.o
${FC_0}
${FC_FLAGS_SET_0}
-c
-o
$@
$<
...
...
ocn/gm_compute_Bolus_velocity/compile.log
0 → 100644
View file @
11bad468
This diff is collapsed.
Click to expand it.
ocn/gm_compute_Bolus_velocity/kernel_driver.F90
View file @
11bad468
...
...
@@ -138,14 +138,14 @@
IF
(
kgen_case_count
==
0
)
THEN
WRITE
(
*
,
*
)
"No data file is verified."
ELSE
WRITE
(
*
,
"(4X, A36, A1, I6)"
)
"Total number of verification cases "
,
":"
,
kgen_case_count
WRITE
(
*
,
"(4X, A36, A1, I6)"
)
"Number of verification-passed cases "
,
":"
,
kgen_count_verified
WRITE
(
*
,
*
)
""
IF
(
kgen_case_count
==
kgen_count_verified
)
THEN
WRITE
(
*
,
"(4X,A)"
)
"kernel: ocn_gm_compute_Bolus_velocity: PASSED verification"
ELSE
WRITE
(
*
,
"(4X,A)"
)
"kernel: ocn_gm_compute_Bolus_velocity: FAILED verification"
END
IF
!
WRITE (*, "(4X, A36, A1, I6)") "Total number of verification cases ", ":", kgen_case_count
!
WRITE (*, "(4X, A36, A1, I6)") "Number of verification-passed cases ", ":", kgen_count_verified
!
WRITE (*, *) ""
!
IF (kgen_case_count == kgen_count_verified) THEN
!
WRITE (*, "(4X,A)") "kernel: ocn_gm_compute_Bolus_velocity: PASSED verification"
!
ELSE
!
WRITE (*, "(4X,A)") "kernel: ocn_gm_compute_Bolus_velocity: FAILED verification"
!
END IF
WRITE
(
*
,
*
)
""
WRITE
(
*
,
"(4X,A19,I3)"
)
"number of processes: "
,
mpisize
WRITE
(
*
,
*
)
""
...
...
ocn/gm_compute_Bolus_velocity/mpas_ocn_gm.f90
View file @
11bad468
This diff is collapsed.
Click to expand it.
ocn/gm_compute_Bolus_velocity/timing.log
0 → 100644
View file @
11bad468
CPU
****************************************************
kernel execution summary: ocn_gm_compute_Bolus_velocity
****************************************************
number of processes 1
Average call time (usec): 0.377E+05
Minimum call time (usec): 0.377E+05
Maximum call time (usec): 0.377E+05
****************************************************
GPU - baseline
****************************************************
kernel execution summary: ocn_gm_compute_Bolus_velocity
****************************************************
number of processes 1
Average call time (usec): 0.388E+05
Minimum call time (usec): 0.388E+05
Maximum call time (usec): 0.388E+05
****************************************************
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment