Commit 3bbfbbbf authored by Youngsung Kim's avatar Youngsung Kim
Browse files

initial port to openacc: ocn/gm_compute_Bolus_velocity

parent 56400f5e
*.o
*.mod
*.exe
*.swp
......@@ -69,7 +69,8 @@ else ifeq (${COMP}, crayftn)
else ifeq (${COMP}, pgfortran)
FC_0 := pgfortran
FC_FLAGS_SET_0 := -i4 -time -Mstack_arrays -Mextend -byteswapio -Mflushz -Kieee -Mallocatable=03 -O2 -Mpreprocess
#FC_FLAGS_SET_0 := -i4 -time -Mstack_arrays -Mextend -byteswapio -Mflushz -Kieee -Mallocatable=03 -O2 -Mpreprocess
FC_FLAGS_SET_0 := -i4 -time -Mstack_arrays -Mextend -byteswapio -Mflushz -Kieee -Mallocatable=03 -O2 -Mpreprocess -ta=tesla -Minfo=all,ccff
_PREPROCFLAG := -Mpreprocess
else
......
ocn_gm_compute_Bolus_velocity.60.0.1.dat
ocn_gm_compute_Bolus_velocity.63.0.1.dat
ocn_gm_compute_Bolus_velocity.61.0.1.dat
ocn_gm_compute_Bolus_velocity.30.0.1.dat
ocn_gm_compute_Bolus_velocity.56.0.1.dat
ocn_gm_compute_Bolus_velocity.44.0.1.dat
ocn_gm_compute_Bolus_velocity.19.0.1.dat
ocn_gm_compute_Bolus_velocity.17.0.1.dat
ocn_gm_compute_Bolus_velocity.57.0.1.dat
ocn_gm_compute_Bolus_velocity.47.0.1.dat
ocn_gm_compute_Bolus_velocity.39.0.1.dat
ocn_gm_compute_Bolus_velocity.51.0.1.dat
ocn_gm_compute_Bolus_velocity.8.0.1.dat
ocn_gm_compute_Bolus_velocity.5.0.1.dat
ocn_gm_compute_Bolus_velocity.45.0.1.dat
ocn_gm_compute_Bolus_velocity.54.0.1.dat
ocn_gm_compute_Bolus_velocity.33.0.1.dat
ocn_gm_compute_Bolus_velocity.53.0.1.dat
ocn_gm_compute_Bolus_velocity.9.0.1.dat
ocn_gm_compute_Bolus_velocity.1.0.1.dat
ocn_gm_compute_Bolus_velocity.14.0.1.dat
ocn_gm_compute_Bolus_velocity.4.0.1.dat
ocn_gm_compute_Bolus_velocity.11.0.1.dat
ocn_gm_compute_Bolus_velocity.50.0.1.dat
ocn_gm_compute_Bolus_velocity.29.0.1.dat
ocn_gm_compute_Bolus_velocity.6.0.1.dat
ocn_gm_compute_Bolus_velocity.28.0.1.dat
ocn_gm_compute_Bolus_velocity.18.0.1.dat
ocn_gm_compute_Bolus_velocity.3.0.1.dat
ocn_gm_compute_Bolus_velocity.43.0.1.dat
ocn_gm_compute_Bolus_velocity.32.0.1.dat
ocn_gm_compute_Bolus_velocity.25.0.1.dat
ocn_gm_compute_Bolus_velocity.55.0.1.dat
ocn_gm_compute_Bolus_velocity.48.0.1.dat
ocn_gm_compute_Bolus_velocity.13.0.1.dat
ocn_gm_compute_Bolus_velocity.23.0.1.dat
ocn_gm_compute_Bolus_velocity.20.0.1.dat
ocn_gm_compute_Bolus_velocity.7.0.1.dat
ocn_gm_compute_Bolus_velocity.21.0.1.dat
ocn_gm_compute_Bolus_velocity.16.0.2.dat
ocn_gm_compute_Bolus_velocity.60.0.1.dat
ocn_gm_compute_Bolus_velocity.63.0.1.dat
ocn_gm_compute_Bolus_velocity.61.0.1.dat
ocn_gm_compute_Bolus_velocity.30.0.1.dat
ocn_gm_compute_Bolus_velocity.56.0.1.dat
ocn_gm_compute_Bolus_velocity.44.0.1.dat
ocn_gm_compute_Bolus_velocity.19.0.1.dat
ocn_gm_compute_Bolus_velocity.17.0.1.dat
ocn_gm_compute_Bolus_velocity.57.0.1.dat
ocn_gm_compute_Bolus_velocity.47.0.1.dat
ocn_gm_compute_Bolus_velocity.39.0.1.dat
ocn_gm_compute_Bolus_velocity.51.0.1.dat
ocn_gm_compute_Bolus_velocity.8.0.1.dat
ocn_gm_compute_Bolus_velocity.5.0.1.dat
ocn_gm_compute_Bolus_velocity.45.0.1.dat
ocn_gm_compute_Bolus_velocity.54.0.1.dat
ocn_gm_compute_Bolus_velocity.33.0.1.dat
ocn_gm_compute_Bolus_velocity.53.0.1.dat
ocn_gm_compute_Bolus_velocity.9.0.1.dat
ocn_gm_compute_Bolus_velocity.1.0.1.dat
ocn_gm_compute_Bolus_velocity.14.0.1.dat
ocn_gm_compute_Bolus_velocity.4.0.1.dat
ocn_gm_compute_Bolus_velocity.11.0.1.dat
ocn_gm_compute_Bolus_velocity.50.0.1.dat
ocn_gm_compute_Bolus_velocity.29.0.1.dat
ocn_gm_compute_Bolus_velocity.6.0.1.dat
ocn_gm_compute_Bolus_velocity.28.0.1.dat
ocn_gm_compute_Bolus_velocity.18.0.1.dat
ocn_gm_compute_Bolus_velocity.3.0.1.dat
ocn_gm_compute_Bolus_velocity.43.0.1.dat
ocn_gm_compute_Bolus_velocity.32.0.1.dat
ocn_gm_compute_Bolus_velocity.25.0.1.dat
ocn_gm_compute_Bolus_velocity.55.0.1.dat
ocn_gm_compute_Bolus_velocity.48.0.1.dat
ocn_gm_compute_Bolus_velocity.13.0.1.dat
ocn_gm_compute_Bolus_velocity.23.0.1.dat
ocn_gm_compute_Bolus_velocity.20.0.1.dat
ocn_gm_compute_Bolus_velocity.7.0.1.dat
ocn_gm_compute_Bolus_velocity.21.0.1.dat
ocn_gm_compute_Bolus_velocity.16.0.2.dat
......@@ -51,6 +51,9 @@ subroutine kgen_array_sumcheck(varname, sum1, sum2, finish)
logical, intent(in), optional :: finish
logical checkresult
! prevent sumcheck failure with very small value
return
if ( SKIP_SUMCHECK ) then
return
end if
......
......@@ -336,23 +336,32 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
!call to kgen kernel
!$acc data copyin(nvertlevels)
nCells = nCellsArray( size(nCellsArray) )
nEdges = nEdgesArray( size(nEdgesArray) )
! Assign a huge value to the scratch variables which may manifest itself when
! there is a bug.
!$omp do schedule(runtime) private(k)
!print *, "NEDGES", nEdges
!print *, "NVERTLEVELS", nvertlevels
!$acc parallel loop gang
do iEdge = 1, nEdges
!$acc loop vector
do k = 1, nVertLevels
gradDensityEdge(k, iEdge) = huge(0D0)
gradZMidEdge(k, iEdge) = huge(0D0)
normalGMBolusVelocity(k, iEdge) = 0.0_RKIND
end do
end do
!$acc end parallel
!$omp end do
!$omp do schedule(runtime) private(k)
!$acc parallel loop gang
do iEdge = 1, nEdges
!$acc loop vector
do k = 1, nVertLevels + 1
gradDensityTopOfEdge(k, iEdge) = huge(0D0)
dDensityDzTopOfEdge(k, iEdge) = huge(0D0)
......@@ -361,10 +370,13 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
relativeSlopeTapering(k, iEdge) = 0.0_RKIND
end do
end do
!$acc end parallel
!$omp end do
!$omp do schedule(runtime) private(k)
!$acc parallel loop gang
do iCell = 1, nCells + 1
!$acc loop vector
do k = 1, nVertLevels
dDensityDzTopOfCell(k, iCell) = huge(0D0)
k33(k, iCell) = 0.0_RKIND
......@@ -372,6 +384,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
relativeSlopeTaperingCell(k, iCell) = 0.0_RKIND
end do
end do
!$acc end parallel
!$omp end do
!--------------------------------------------------------------------
......@@ -386,7 +399,10 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
! Note that displacedDensity is used from the upper cell, so that the EOS reference level for
! pressure is the same for both displacedDensity(k-1,iCell) and density(k,iCell).
!$omp do schedule(runtime) private(k, rtmp)
! verification failure
!!$acc parallel loop gang
do iCell = 1, nCells
!!$acc loop vector
do k = 2, maxLevelCell(iCell)
rtmp = (displacedDensity(k-1,iCell) - density(k,iCell)) / (zMid(k-1,iCell) - zMid(k,iCell))
dDensityDzTopOfCell(k,iCell) = min(rtmp, -epsGM)
......@@ -398,19 +414,23 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
dDensityDzTopOfCell(1,iCell) = 0.0_RKIND
dDensityDzTopOfCell(maxLevelCell(iCell)+1,iCell) = 0.0_RKIND
end do
!!$acc end parallel
!$omp end do
nEdges = nEdgesArray( 3 )
! Interpolate dDensityDzTopOfCell to edge and layer interface
!$omp do schedule(runtime) private(k, cell1, cell2)
!$acc parallel loop gang
do iEdge = 1, nEdges
!$acc loop vector
do k = 1, maxLevelEdgeTop(iEdge)+1
cell1 = cellsOnEdge(1,iEdge)
cell2 = cellsOnEdge(2,iEdge)
dDensityDzTopOfEdge(k,iEdge) = 0.5_RKIND * (dDensityDzTopOfCell(k,cell1) + dDensityDzTopOfCell(k,cell2))
end do
end do
!$acc end parallel
!$omp end do
!--------------------------------------------------------------------
......@@ -426,24 +446,31 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
! along the constant coordinate surface.
! The computed variables lives at edge and mid-layer depth
!$omp do schedule(runtime) private(cell1, cell2, k)
! verification failure
!!$acc parallel loop gang
do iEdge = 1, nEdges
cell1 = cellsOnEdge(1,iEdge)
cell2 = cellsOnEdge(2,iEdge)
!!$acc loop vector
do k=1,maxLevelEdgeTop(iEdge)
gradDensityEdge(k,iEdge) = (density(k,cell2) - density(k,cell1)) / dcEdge(iEdge)
gradZMidEdge(k,iEdge) = (zMid(k,cell2) - zMid(k,cell1)) / dcEdge(iEdge)
end do
end do
!!$acc end parallel
!$omp end do
nEdges = nEdgesArray( 3 )
! Interpolate gradDensityEdge and gradZMidEdge to layer interface
!$omp do schedule(runtime) private(k, h1, h2)
! verification failure
!!$acc parallel loop gang
do iEdge = 1, nEdges
! The interpolation can only be carried out on non-boundary edges
if (maxLevelEdgeTop(iEdge) .GE. 1) then
!!$acc loop vector
do k = 2, maxLevelEdgeTop(iEdge)
h1 = layerThicknessEdge(k-1,iEdge)
h2 = layerThicknessEdge(k,iEdge)
......@@ -461,6 +488,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
gradZMidTopOfEdge(maxLevelEdgeTop(iEdge)+1,iEdge) = gradZMidEdge(maxLevelEdgeTop(iEdge),iEdge)
end if
end do
!!$acc end parallel
!$omp end do
!--------------------------------------------------------------------
......@@ -472,14 +500,17 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
nEdges = nEdgesArray( 3 )
!$omp do schedule(runtime) private(k)
!$acc parallel loop gang
do iEdge = 1, nEdges
if (maxLevelEdgeTop(iEdge) .GE. 1) then
!$acc loop vector
do k = 1, maxLevelEdgeTop(iEdge)+1
gradDensityConstZTopOfEdge(k,iEdge) = gradDensityTopOfEdge(k,iEdge) - dDensityDzTopOfEdge(k,iEdge) &
* gradZMidTopOfEdge(k,iEdge)
end do
end if
end do
!$acc end parallel
!$omp end do
!--------------------------------------------------------------------
......@@ -494,10 +525,12 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
! Compute relativeSlopeTopOfEdge at edge and layer interface
! set relativeSlopeTopOfEdge to zero for horizontal land/water edges.
!$omp do schedule(runtime) private(k)
!$acc parallel loop gang
do iEdge = 1, nEdges
relativeSlopeTopOfEdge(:, iEdge) = 0.0_RKIND
! Beside a full land cell (e.g. missing cell) maxLevelEdgeTop=0, so relativeSlopeTopOfEdge at that edge will remain zero.
!$acc loop vector
do k = 2, maxLevelEdgeTop(iEdge)
relativeSlopeTopOfEdge(k,iEdge) = - gradDensityTopOfEdge(k,iEdge) / min(dDensityDzTopOfEdge(k,iEdge),-epsGM)
end do
......@@ -513,6 +546,8 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
relativeSlopeTopOfEdge( maxLevelEdgeTop(iEdge)+1, iEdge ) = relativeSlopeTopOfEdge( max(1,maxLevelEdgeTop(iEdge)), iEdge )
end do
!$acc end parallel
!$omp do schedule(runtime) private(k)
!$omp end do
nEdges = nEdgesArray( 3 )
......@@ -520,11 +555,14 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
! slope can be unbounded in regions of neutral stability, reset to the large, but bounded, value
! values is hardwrite to 1.0, this is equivalent to a slope of 45 degrees
!$omp do schedule(runtime) private(k)
!$acc parallel loop gang
do iEdge = 1, nEdges
!$acc loop vector
do k = 1, nVertLevels
relativeSlopeTopOfEdge(k, iEdge) = max( min( relativeSlopeTopOfEdge(k, iEdge), 1.0_RKIND), -1.0_RKIND)
end do
end do
!$acc end parallel
!$omp end do
! average relative slope to cell centers
......@@ -533,6 +571,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
nCells = nCellsArray( 2 )
!$omp do schedule(runtime) private(i, iEdge, areaEdge, rtmp, k)
!$acc parallel loop gang
do iCell = 1, nCells
areaCellSum(:, iCell) = 1.0e-34_RKIND
do i = 1, nEdgesOnCell(iCell)
......@@ -540,6 +579,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
!contribution of cell area from this edge * 2.0
areaEdge = 0.5_RKIND * dcEdge(iEdge) * dvEdge(iEdge)
!$acc loop vector
do k = 1, maxLevelEdgeTop(iEdge)
rtmp = areaEdge * relativeSlopeTopOfEdge(k, iEdge)**2
relativeSlopeTopOfCell(k, iCell) = relativeSlopeTopOfCell(k, iCell) + rtmp
......@@ -547,16 +587,20 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
end do
end do
end do
!$acc end parallel
!$omp end do
nCells = nCellsArray( 2 )
!$omp do schedule(runtime) private(k)
!$acc parallel loop gang
do iCell=1,nCells
!$acc loop vector
do k = 1, maxLevelCell(iCell)
relativeSlopeTopOfCell(k,iCell) = sqrt( relativeSlopeTopOfCell(k,iCell)/areaCellSum(k,iCell) )
end do
end do
!$acc end parallel
!$omp end do
! Compute tapering function
......@@ -565,13 +609,17 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
nCells = nCellsArray( size(nCellsArray) )
!$omp do schedule(runtime)
! verification failure
!!$acc parallel loop gang
do iCell = 1, nCells
k33(:, iCell) = 0.0_RKIND
end do
!!$acc end parallel
!$omp end do
! use relativeSlopeTaperingCell as a temporary space for smoothing of relativeSlopeTopOfCell
relativeSlopeTaperingCell = relativeSlopeTopOfCell
!!$acc parallel loop gang
do iter = 1, 5
nCells = nCellsArray( 2 )
......@@ -580,6 +628,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
do iCell=1,nCells
relativeSlopeTaperingCell(1, iCell) = 0.0_RKIND
relativeSlopeTaperingCell(maxLevelCell(iCell):nVertLevels, iCell) = 0.0_RKIND
!!$acc loop vector
do k = 2, maxLevelCell(iCell)-1
rtmp = relativeSlopeTopOfCell(k-1,iCell) + relativeSlopeTopOfCell(k+1,iCell)
stmp = 2.0_RKIND*relativeSlopeTopOfCell(k,iCell)
......@@ -589,15 +638,20 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
end do
!$omp end do
end do ! iter
!!$acc end parallel
nCells = nCellsArray ( 2 )
! first, compute tapering across full domain based on a maximum allowable slope
!$omp do schedule(runtime) private(k)
!!$acc parallel loop gang
do iCell=1,nCells
! compilation error
!!$acc loop vector
do k = 1, maxLevelCell(iCell)
relativeSlopeTaperingCell(k,iCell) = min(1.0_RKIND, config_max_relative_slope / (relativeSlopeTopOfCell(k,iCell)+epsGM))
end do
end do
!!$acc end parallel
!$omp end do
! now further taper in the boundary layer
......@@ -606,7 +660,10 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
if(config_use_Redi_surface_layer_tapering) then
nCells = nCellsArray ( 2 )
!$omp do schedule(runtime) private(k, rtmp)
!!$acc parallel loop gang
do iCell=1,nCells
! compilation error
!!$acc loop vector
do k = 1, maxLevelCell(iCell)
rtmp = -zMid(k,iCell)/max(config_Redi_surface_layer_tapering_extent,boundaryLayerDepth(iCell)+epsGM)
rtmp = max(0.0_RKIND,rtmp)
......@@ -614,6 +671,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
relativeSlopeTaperingCell(k,iCell) = rtmp*relativeSlopeTaperingCell(k,iCell)
end do
end do
!!$acc end parallel
!$omp end do
endif ! config_use_Redi_surface_layer_tapering
......@@ -636,34 +694,42 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
nCells = nCellsArray( 2 )
!$omp do schedule(runtime) private(k)
!$acc parallel loop gang
do iCell=1,nCells
k33(:, iCell) = 0.0_RKIND
!$acc loop vector
do k = 2, maxLevelCell(iCell)
k33(k,iCell) = ( relativeSlopeTaperingCell(k,iCell) * relativeSlopeTopOfCell(k,iCell) )**2
end do
end do
!$acc end parallel
!$omp end do
nEdges = nEdgesArray( 3 )
! average tapering function to layer edges
!$omp do schedule(runtime) private(cell1, cell2, k)
!$acc parallel loop gang
do iEdge = 1, nEdges
cell1 = cellsOnEdge(1,iEdge)
cell2 = cellsOnEdge(2,iEdge)
!$acc loop vector
do k = 1, maxLevelEdgeTop(iEdge)
relativeSlopeTapering(k,iEdge) = 0.5_RKIND * (relativeSlopeTaperingCell(k,cell1) + relativeSlopeTaperingCell(k,cell2))
enddo
enddo
!$acc end parallel
!$omp end do
! allow disabling of K33 for testing
if(config_disable_redi_k33) then
nCells = nCellsArray( size(nCellsArray) )
!$omp do schedule(runtime)
!$acc parallel loop gang
do iCell = 1, nCells
k33(:, iCell) = 0.0_RKIND
end do
!$acc end parallel
!$omp end do
end if
......@@ -675,6 +741,8 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
if (config_gm_lat_variable_c2) then
!$omp do schedule(runtime) private(cell1, cell2, sumN2, ltSum, countN2, BruntVaisalaFreqTopEdge)
! compilation error
!!$acc parallel loop gang
do iEdge = 1, nEdges
cell1 = cellsOnEdge(1,iEdge)
cell2 = cellsOnEdge(2,iEdge)
......@@ -682,6 +750,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
ltSum = 0.0
countN2 = 0
!!$acc loop vector
do k=2,maxLevelEdgeTop(iEdge)
BruntVaisalaFreqTopEdge = 0.5_RKIND * (BruntVaisalaFreqTop(k,cell1) + BruntVaisalaFreqTop(k,cell2))
......@@ -696,6 +765,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
if(countN2 > 0) cGMphaseSpeed(iEdge) = max(config_gm_min_phase_speed ,sqrt(sumN2/ltSum)*ltSum / 3.141592_RKIND)
enddo
!!$acc end parallel
!$omp end do
else
......@@ -1441,6 +1511,7 @@ SUBROUTINE ocn_gm_compute_bolus_velocity(kgen_unit, kgen_measure, kgen_isverifie
!$omp end do
!$acc end data
!$kgen end_callsite
......@@ -2187,4 +2258,3 @@ END SUBROUTINE ocn_gm_compute_bolus_velocity
end module ocn_gm
!|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
! vim: foldmethod=marker
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment