Loading main_cudaStreamAddCallback.F90 +10 −4 Original line number Diff line number Diff line Loading @@ -87,7 +87,7 @@ contains use iso_c_binding use omp_lib implicit none type(c_ptr),value :: stream type(c_ptr) :: stream integer(C_INT),value :: fstatus type(C_PTR),value :: event integer(kind=omp_event_handle_kind), pointer :: f_event Loading Loading @@ -158,6 +158,8 @@ program CudaStreamAddCallback_detach temp1 = omp_get_wtime() - temp1 write(6,*) "After map",temp1 !$OMP TARGET DATA USE_DEVICE_PTR(ubuf) !$OMP PARALLEL DEFAULT(NONE) PRIVATE(temp1, ierr, dpitch, spitch, & !$OMP width, height, sizeBytes, i1, i2, iny1, ip) SHARED(h2d_event, vxz, ubuf, nx, & !$OMP ny, nz, np, zero, ptr_callback, h2d_stream, ibuf, nbuf, next, & Loading Loading @@ -190,7 +192,7 @@ program CudaStreamAddCallback_detach if (ierr .gt. 0) write(6,*) "ERROR: MemCpy2DAsync ip, ierr = ", 1, ierr ierr = cudaStreamAddCallback (h2d_stream, ptr_callback, C_LOC(h2d_event), zero) if (ierr .gt. 0) write(6,*) "ERROR: LaunchHostFunc ip, ierr = ", 1, ierr if (ierr .gt. 0) write(6,*) "ERROR: StreamAddCallback ip, ierr = ", 1, ierr write(6,*) "After Add callback",1 !!call flush(6) flush(6) Loading Loading @@ -234,7 +236,7 @@ program CudaStreamAddCallback_detach if (ierr .gt. 0) write(6,*) "ERROR: MemCpy2DAsync ip, ierr = ", i1, ierr ierr = cudaStreamAddCallback (h2d_stream, ptr_callback, C_LOC(h2d_event), zero) if (ierr .gt. 0) write(6,*) "ERROR: LaunchHostFunc ip, ierr = ", i1, ierr if (ierr .gt. 0) write(6,*) "ERROR: StreamAddCallback ip, ierr = ", i1, ierr write(6,*) "After Add callback",i1 !!call flush(6) flush(6) Loading @@ -249,6 +251,8 @@ program CudaStreamAddCallback_detach end do !$OMP TASKWAIT do ip=1,np !$OMP TASK DEPEND(IN:ubuf(:,:,:,ip)) write(6,*) "Copy task complete, ip=",ip Loading @@ -260,7 +264,7 @@ program CudaStreamAddCallback_detach !$OMP TASKWAIT !$OMP TARGET UPDATE TO(ubuf) !!$OMP TARGET UPDATE TO(ubuf) !$OMP END SINGLE Loading @@ -268,6 +272,8 @@ program CudaStreamAddCallback_detach !$OMP END TARGET DATA !$OMP END TARGET DATA !! error check inyi = ny/np do ip = 1,nbuf Loading setUpModules_gcc.sh +7 −5 Original line number Diff line number Diff line Loading @@ -5,7 +5,8 @@ USE_MPI=0 if [ ${USE_MPI} -eq 0 ]; then # use next two lines for GCC with OpenMP Offload module use /sw/summit/modulefiles/ums/stf010/Core module load gcc/11.1.0-20220305 #module load gcc/11.1.0-20220305 module load gcc/11.1.0-latest module load cuda/11.0.3 else Loading @@ -17,8 +18,9 @@ else # then prepend bin and and LD_LIBRARY_PATH GCC_UMS_DIR=/sw/summit/ums/stf010/gcc #latest=$(ls --color=never ${GCC_UMS_DIR} | tail -n1) latest="11.1.0-20220305" latest=$(ls --color=never ${GCC_UMS_DIR} | tail -n1) #latest="11.1.0-20220305" #latest="11.1.0-latest" export GCC_ROOT=$GCC_UMS_DIR/$latest echo "Using GCC in $GCC_ROOT" Loading @@ -30,8 +32,8 @@ else export OMPI_FC=${GCC_ROOT}/bin/gfortran export LD_LIBRARY_PATH=${GCC_ROOT}/lib64:${LD_LIBRARY_PATH} #gfortran --version #mpif90 --version gfortran --version mpif90 --version fi Loading Loading
main_cudaStreamAddCallback.F90 +10 −4 Original line number Diff line number Diff line Loading @@ -87,7 +87,7 @@ contains use iso_c_binding use omp_lib implicit none type(c_ptr),value :: stream type(c_ptr) :: stream integer(C_INT),value :: fstatus type(C_PTR),value :: event integer(kind=omp_event_handle_kind), pointer :: f_event Loading Loading @@ -158,6 +158,8 @@ program CudaStreamAddCallback_detach temp1 = omp_get_wtime() - temp1 write(6,*) "After map",temp1 !$OMP TARGET DATA USE_DEVICE_PTR(ubuf) !$OMP PARALLEL DEFAULT(NONE) PRIVATE(temp1, ierr, dpitch, spitch, & !$OMP width, height, sizeBytes, i1, i2, iny1, ip) SHARED(h2d_event, vxz, ubuf, nx, & !$OMP ny, nz, np, zero, ptr_callback, h2d_stream, ibuf, nbuf, next, & Loading Loading @@ -190,7 +192,7 @@ program CudaStreamAddCallback_detach if (ierr .gt. 0) write(6,*) "ERROR: MemCpy2DAsync ip, ierr = ", 1, ierr ierr = cudaStreamAddCallback (h2d_stream, ptr_callback, C_LOC(h2d_event), zero) if (ierr .gt. 0) write(6,*) "ERROR: LaunchHostFunc ip, ierr = ", 1, ierr if (ierr .gt. 0) write(6,*) "ERROR: StreamAddCallback ip, ierr = ", 1, ierr write(6,*) "After Add callback",1 !!call flush(6) flush(6) Loading Loading @@ -234,7 +236,7 @@ program CudaStreamAddCallback_detach if (ierr .gt. 0) write(6,*) "ERROR: MemCpy2DAsync ip, ierr = ", i1, ierr ierr = cudaStreamAddCallback (h2d_stream, ptr_callback, C_LOC(h2d_event), zero) if (ierr .gt. 0) write(6,*) "ERROR: LaunchHostFunc ip, ierr = ", i1, ierr if (ierr .gt. 0) write(6,*) "ERROR: StreamAddCallback ip, ierr = ", i1, ierr write(6,*) "After Add callback",i1 !!call flush(6) flush(6) Loading @@ -249,6 +251,8 @@ program CudaStreamAddCallback_detach end do !$OMP TASKWAIT do ip=1,np !$OMP TASK DEPEND(IN:ubuf(:,:,:,ip)) write(6,*) "Copy task complete, ip=",ip Loading @@ -260,7 +264,7 @@ program CudaStreamAddCallback_detach !$OMP TASKWAIT !$OMP TARGET UPDATE TO(ubuf) !!$OMP TARGET UPDATE TO(ubuf) !$OMP END SINGLE Loading @@ -268,6 +272,8 @@ program CudaStreamAddCallback_detach !$OMP END TARGET DATA !$OMP END TARGET DATA !! error check inyi = ny/np do ip = 1,nbuf Loading
setUpModules_gcc.sh +7 −5 Original line number Diff line number Diff line Loading @@ -5,7 +5,8 @@ USE_MPI=0 if [ ${USE_MPI} -eq 0 ]; then # use next two lines for GCC with OpenMP Offload module use /sw/summit/modulefiles/ums/stf010/Core module load gcc/11.1.0-20220305 #module load gcc/11.1.0-20220305 module load gcc/11.1.0-latest module load cuda/11.0.3 else Loading @@ -17,8 +18,9 @@ else # then prepend bin and and LD_LIBRARY_PATH GCC_UMS_DIR=/sw/summit/ums/stf010/gcc #latest=$(ls --color=never ${GCC_UMS_DIR} | tail -n1) latest="11.1.0-20220305" latest=$(ls --color=never ${GCC_UMS_DIR} | tail -n1) #latest="11.1.0-20220305" #latest="11.1.0-latest" export GCC_ROOT=$GCC_UMS_DIR/$latest echo "Using GCC in $GCC_ROOT" Loading @@ -30,8 +32,8 @@ else export OMPI_FC=${GCC_ROOT}/bin/gfortran export LD_LIBRARY_PATH=${GCC_ROOT}/lib64:${LD_LIBRARY_PATH} #gfortran --version #mpif90 --version gfortran --version mpif90 --version fi Loading