Commit f790c860 authored by Youngsung Kim's avatar Youngsung Kim
Browse files

collected profile data

parent 60c72012
......@@ -70,7 +70,7 @@ else ifeq (${COMP}, pgfortran)
FC_0 := pgfortran
#FC_FLAGS_SET_0 := -i4 -time -Mstack_arrays -Mextend -byteswapio -Mflushz -Kieee -Mallocatable=03 -O2 -Mpreprocess
FC_FLAGS_SET_0 := -i4 -time -Mstack_arrays -Mextend -byteswapio -Mflushz -Kieee -Mallocatable=03 -O2 -Mpreprocess -ta=tesla -Minfo=all,ccff
FC_FLAGS_SET_0 := -gopt -i4 -time -Mstack_arrays -Mextend -byteswapio -Mflushz -Kieee -Mallocatable=03 -O2 -Mpreprocess -ta=tesla:debug,lineinfo -Minfo=all,ccff
_PREPROCFLAG := -Mpreprocess
else
......@@ -154,6 +154,30 @@ help:
ALL_OBJS := mpas_ocn_gm.o mpas_ocn_constants.o mpas_constants.o mpas_kind_types.o kernel_driver.o kgen_utils.o tprof_mod.o
APP := kernel.exe
OUTDIR := ${PWD}
all: nsys ncu
zip: all
cd ${OUTDIR}; tar -cvzf ${APP}.tar ${APP}.systems.qdrep ${APP}.compute.ncu-rep *.f90 *.F90
nsys: ${OUTDIR}/${APP}.systems.qdrep
@echo "Output files are in ${OUTDIR}"
ncu: ${OUTDIR}/${APP}.compute.ncu-rep
@echo "Output files are in ${OUTDIR}"
${OUTDIR}/${APP}.compute.ncu-rep: ${OUTDIR}/${APP}
#jsrun -n1 -c1 -g1 -a1 --smpiargs="-disable_gpu_hooks" -- ncu --target-processes=all -c 1500 --set=full --force-overwrite -o ${OUTDIR}/${APP}.compute ${OUTDIR}/${APP}
jsrun -n1 -c1 -g1 -a1 --smpiargs="-disable_gpu_hooks" -- ncu --target-processes=all --set=full --force-overwrite -o ${OUTDIR}/${APP}.compute ${OUTDIR}/${APP}
${OUTDIR}/${APP}.systems.qdrep: ${OUTDIR}/${APP}
jsrun -n1 -c1 -g1 -a1 -- nsys profile -o ${OUTDIR}/${APP}.systems.qdrep -f true -t cuda,osrt,openacc ${OUTDIR}/${APP}
${OUTDIR}/${APP}: build
build: ${ALL_OBJS}
${FC_0} ${FC_FLAGS_SET_0} -o kernel.exe $^
......@@ -182,4 +206,4 @@ tprof_mod.o: tprof_mod.f90
${FC_0} ${FC_FLAGS_SET_0} -c -o $@ $<
clean:
rm -f kernel.exe *.mod ${ALL_OBJS}
rm -f kernel.exe *.mod ${ALL_OBJS} *.qdrep *.ncu-rep *.tar
#!/usr/bin/bash
module load pgi
module load cuda
module load nsight-systems
module load nsight-compute
......@@ -14,7 +14,7 @@
!
module ocn_gm
!use nvtx
USE mpas_constants
USE ocn_constants
......@@ -321,6 +321,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
!$kgen begin_callsite ocn_gm_compute_Bolus_velocity
!call nvtxStartRange("First label")
IF (kgen_evalstage) THEN
......@@ -918,6 +919,9 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
gmStreamFuncTopOfCell(:, iCell) = gmStreamFuncTopOfCell(:,iCell) / areaCell(iCell)
end do
!!$acc end parallel
!call nvtxEndRange
IF (kgen_mainstage) THEN
!verify init
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment