Commit 1896d83e authored by Juan Caneses Marin (nfc)'s avatar Juan Caneses Marin (nfc)
Browse files

Succesfully parallized test_spline with up to 32 cores leading to x16 speedup....

Succesfully parallized test_spline with up to 32 cores leading to x16 speedup. Needed to find replacement for fitpack to achieve that. Fitpack does not perform will with openMP. foudn that all intrinsic functions do speed up well with openMP
parent d48980f9
Loading
Loading
Loading
Loading
+117 −4
Original line number Diff line number Diff line
@@ -466,8 +466,8 @@ REAL(r8) FUNCTION Interp1(xq, spline0)
  REAL(r8) :: curv2

  ! Output data:
  Interp1 = curv2(xq,spline0%n,spline0%x,spline0%y,spline0%yp,spline0%sigma)
  !Interp1 = 1.
  !Interp1 = curv2(xq,spline0%n,spline0%x,spline0%y,spline0%yp,spline0%sigma)
  Interp1 = COS(xq)

  RETURN
END FUNCTION Interp1
@@ -488,8 +488,121 @@ REAL(r8) FUNCTION diff1(xq, spline0)
  REAL(r8) :: curvd

  ! Output data:
  diff1 = curvd(xq,spline0%n,spline0%x,spline0%y,spline0%yp,spline0%sigma)
  ! diff1 = 0.1
  !diff1 = curvd(xq,spline0%n,spline0%x,spline0%y,spline0%yp,spline0%sigma)
  diff1 = COS(xq)

 RETURN
END FUNCTION diff1

! =======================================================================================================
! source: http://web.gps.caltech.edu/~cdp/cloudmodel/Current/util/
   SUBROUTINE spline(x, y, n, yp1, ypn, y2)
!   use nrtype
!
! Given arrays x(1:n) and y(1:n) containing a tabulated function, i.e.
! y(i)=f(x(i)), with x(1)<x(2)<...<x(n), and given values yp1 and ypn for
! the first derivative of the interpolating function at points 1 and n,
! respectively, this routine returns an array y2(1:n) of length n which
! contains the second derivatives of the interpolating function at the
! tabulated points x(i).  If yp1 and/or ypn are equal to 1.e30 or larger,
! the routine is signaled to set the corresponding boundary condition for a
! natural spline with zero second derivative on that boundary.
! Parameter: nmax is the largest anticipiated value of n
! (adopted from Numerical Recipes in FORTRAN 77)
!
   INTEGER, PARAMETER :: DP=KIND(1.0D0)
   INTEGER:: n
   INTEGER, PARAMETER:: nmax=500
   REAL(DP):: yp1, ypn, x(n), y(n), y2(n)
   INTEGER:: i, k
   REAL(DP):: p, qn, sig, un, u(nmax)

     if (yp1.gt..99e30) then
        y2(1)=0.
        u(1)=0.
     else
        y2(1)=-0.5
        u(1)=(3./(x(2)-x(1)))*((y(2)-y(1))/(x(2)-x(1))-yp1)
     endif

     do i=2, n-1
        sig=(x(i)-x(i-1))/(x(i+1)-x(i-1))
        p=sig*y2(i-1)+2.
        y2(i)=(sig-1.)/p
        u(i)=(6.*((y(i+1)-y(i))/(x(i+1)-x(i))-(y(i)-y(i-1))/&
             & (x(i)-x(i-1)))/(x(i+1)-x(i-1))-sig*u(i-1))/p
     enddo

     if (ypn.gt..99e30) then
        qn=0.
        un=0.
     else
        qn=0.5
        un=(3./(x(n)-x(n-1)))*(ypn-(y(n)-y(n-1))/(x(n)-x(n-1)))
     endif

     y2(n)=(un-qn*u(n-1))/(qn*y2(n-1)+1.)

     do k=n-1, 1, -1
        y2(k)=y2(k)*y2(k+1)+u(k)
     enddo

     return
     END


! =======================================================================================================
! source: http://web.gps.caltech.edu/~cdp/cloudmodel/Current/util/
   SUBROUTINE splint(xa, ya, y2a, n, x, y)
!   USE nrtype
!
! Given the arrays xa(1:n) and ya(1:n) of length n, which tabulate a function
! (with the xa(i) in order), and given the array y2a(1:n), which is the output
! from the subroutine spline, and given a value of x, this routine returns a
! cubic spline interpolated value y.
! (adopted from Numerical Recipes in FORTRAN 77)
!
   INTEGER, PARAMETER :: DP = KIND(1.0D0)
   INTEGER:: n
   REAL(DP):: x, y, xa(n), y2a(n), ya(n)
   INTEGER:: k, khi, klo
   REAL(DP):: a, b, h

     klo=1
     khi=n
1   if (khi-klo.gt.1) then
        k=(khi+klo)/2
        if (xa(k).gt.x) then
           khi=k
        else
           klo=k
        endif
        goto 1
     endif

     h=xa(khi)-xa(klo)
     if (h.eq.0.) pause 'bad xa input in splint'

     a=(xa(khi)-x)/h
     b=(x-xa(klo))/h
     y=a*ya(klo)+b*ya(khi)+((a**3-a)*y2a(klo)+(b**3-b)*y2a(khi))*(h**2)/6.

     return
     END

! =======================================================================================================
SUBROUTINE diff(x,y,n,dy)
USE local
  
IMPLICIT NONE
REAL(r8), DIMENSION(n) :: x, y, dy
INTEGER(i4) :: n, i

do i=1,(n-1)
 dy(i) = (y(i+1) - y(i))/(x(i+1) - x(i))
end do

dy(n) = dy(n-1)  

RETURN
END SUBROUTINE diff
+1 −1
Original line number Diff line number Diff line
@@ -362,7 +362,7 @@ TimeStepping: do j = 1,in%Nsteps
    ! =====================================================================
    id = OMP_GET_THREAD_NUM()
    if (id .EQ. 0) then
      if (j .EQ. 50) then
      if (j .EQ. 150) then
	       oend_estimate = OMP_GET_WTIME()
         WRITE(*,*) 'Estimated compute time: ', in%Nsteps*(oend_estimate-ostart)/j,' [s]'
      end if
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ INPUT_FILE="xp_IonSlowingDown.in"

# Set number of threads:
# ===================================================
export OMP_NUM_THREADS=4
export OMP_NUM_THREADS=32

# Set processor binding for openMP:
# ===================================================
+11 −6
Original line number Diff line number Diff line
@@ -3,15 +3,21 @@
# Select input file:
# ===================================================
INPUT_FILE="Bfield_b.txt"
NZ_STR=501
NZ=501

# Select number of query points for interpolation:
# ===================================================
NQ=1000000

# Set number of threads:
# ===================================================
OMP_NUM_THREADS=4
export OMP_NUM_THREADS=32

# Set processor binding for openMP:
# ===================================================
OMP_PROC_BIND=true
export OMP_PROC_BIND=true
export OMP_WAIT_POLICY=active
export OMP_DYNAMIC=false

# Get repo directory:
# ===================================================
@@ -26,9 +32,8 @@ INPUT_FILE_DIR=$REPO_DIR/BfieldData/$INPUT_FILE
export REPO_DIR
export INPUT_FILE_DIR
export INPUT_FILE
export NZ_STR
export OMP_NUM_THREADS
export OMP_PROC_BIND
export NZ
export NQ

# Compile source code:
# ===================================================
+109 −8
Original line number Diff line number Diff line
@@ -14,16 +14,20 @@ IMPLICIT NONE
! Define local variables:
! ==============================================================================
! User-defined structures:
TYPE(splTYP) :: spline_Bz
TYPE(spltestTYP) :: spline_Test
TYPE(splTYP) :: spline_Bz, spline_dBz
! User-defined functions:
REAL(r8) :: Interp1, diff1
REAL(r8) :: curv2, curvd
! DO loop indices:
INTEGER(i4) :: i,j
INTEGER(i4) :: nz
! Pseudo random number seed:
INTEGER(i4) :: seed_size
INTEGER(i4), DIMENSION(:), ALLOCATABLE :: seed
INTEGER(i4) :: nz, nq
! Pseudo random number:
REAL(r8) :: R
REAL(r8) :: dummy1, dummy2
! Data for interpolation:
REAL(R8) :: x1, x2
REAL(r8), DIMENSION(:), ALLOCATABLE :: x , Bz , dBz
REAL(r8), DIMENSION(:), ALLOCATABLE :: xq, Bzq, dBzq
! OPENMP:
INTEGER(i4) :: id, numThreads
DOUBLE PRECISION :: ostart, oend, oend_estimate
@@ -32,7 +36,7 @@ CHARACTER*300 :: command, mpwd
INTEGER(i4) :: n_mpwd, STATUS
CHARACTER*300 :: rootDir, inputFileDir, dir0, dir1
CHARACTER*300 ::  inputFile, fileName, xpSelector
CHARACTER*300 ::  nz_str, numThreads_str
CHARACTER*300 ::  nz_str, numThreads_str,nq_str

! Read data from shell:
! ==============================================================================
@@ -40,20 +44,117 @@ CALL GET_ENVIRONMENT_VARIABLE('REPO_DIR',rootDir)
CALL GET_ENVIRONMENT_VARIABLE('INPUT_FILE_DIR',inputFileDir)
CALL GET_ENVIRONMENT_VARIABLE('INPUT_FILE',inputFile)
CALL GET_ENVIRONMENT_VARIABLE('OMP_NUM_THREADS',numThreads_str)
CALL GET_ENVIRONMENT_VARIABLE('NZ_STR',nz_str)
CALL GET_ENVIRONMENT_VARIABLE('NZ',nz_str)
CALL GET_ENVIRONMENT_VARIABLE('NQ',nq_str)

READ(numThreads_str,*) numThreads
READ(nz_str,*) nz
READ(nq_str,*) nq

! Print to terminal:
! ==============================================================================
WRITE(*,*) ''
WRITE(*,*) '*********************************************************************'
WRITE(*,*) 'Input file:         ', TRIM(inputFile)
WRITE(*,*) 'Input file w/ dir:  ', TRIM(inputFileDir)
WRITE(*,*) 'nz:                 ', nz
WRITE(*,*) 'Nq:                 ', nq
WRITE(*,*) 'Number of threads:  ', numThreads
WRITE(*,*) '*********************************************************************'
WRITE(*,*) ''

! Allocate memory for interpolated data:
! =============================================================================
ALLOCATE(x(nz),Bz(nz),dBz(nz))
ALLOCATE(xq(nq),Bzq(nq),dBzq(nq))

! Allocate memory to splines:
! ==============================================================================
CALL InitSpline(spline_Bz ,nz,0._8,0._8,1,0._8)
CALL InitSpline(spline_dBz,nz,0._8,0._8,1,0._8)

! Read external file data and create spline data:
! =============================================================================
! spline_Bz:
CALL ReadSpline(spline_Bz,inputFileDir)
x1 =  MINVAL(spline_Bz%x)
x2 =  MAXVAL(spline_Bz%x)
!spline_dBz:
spline_dBz = spline_Bz
CALL diff(spline_dBz%x,spline_Bz%y,nz,spline_dBz%y)

! Based on profile data. compute spline data:
! =============================================================================
CALL ComputeSpline(spline_Bz)
CALL ComputeSpline(spline_dBz)

! Random number generator:
! ============================================================================
ostart = OMP_GET_WTIME()

! Perfom interpolation:
! ===========================================================================
!$OMP PARALLEL DO SHARED(xq,Bzq,dBzq) PRIVATE(R,dummy1,dummy2) SCHEDULE(STATIC)
do i=1,nq
do j=1,10
  ! R needs to be private to avoid race conditions:
  CALL RANDOM_NUMBER(R)

  ! Select query point:
  xq(i) = x1*(1-R) + x2*R

  ! Select between levels of abstraction:
  if (.false.) then
    ! Using fitpack functions directly:
    Bzq(i)  = curv2(xq(i),spline_Bz%n,spline_Bz%x,spline_Bz%y,spline_Bz%yp,spline_Bz%sigma)
    dBzq(i) = curvd(xq(i),spline_Bz%n,spline_Bz%x,spline_Bz%y,spline_Bz%yp,spline_Bz%sigma)
  else if (.false.) then
    ! Using user-defined functions:
    Bzq(i)  = Interp1(xq(i),spline_Bz)
    dBzq(i) = diff1(xq(i),spline_Bz)
  else if (.true.) then
    ! Using Intrinsic functions which are probably vectorized:
    Bzq(i)  = COS(xq(i))
    dBzq(i) = SIN(xq(i))
  else if (.false.) then
    ! Use another spline interpolation tool:
    CALL splint(spline_Bz%x ,spline_Bz%y ,spline_Bz%yp ,nz,xq(i),Bzq(i) )
    CALL splint(spline_dBz%x,spline_dBz%y,spline_dBz%yp,nz,xq(i),dBzq(i))
  end if
end do
end do
!$OMP END PARALLEL DO

! Calculate computational time:
! ===========================================================================
oend = OMP_GET_WTIME()
WRITE(*,*) 'Compute time: ', (oend-ostart)*1000. ,' [ms]'

! Save data:
! ==========================================================================
fileName = trim('test_spline_Bzq.out')
OPEN(unit=8,file=fileName,form="unformatted",status="unknown")
WRITE(8) Bzq
CLOSE(unit=8)

fileName = trim('test_spline_dBzq.out')
OPEN(unit=8,file=fileName,form="unformatted",status="unknown")
WRITE(8) dBzq
CLOSE(unit=8)

fileName = trim('test_spline_xq.out')
OPEN(unit=8,file=fileName,form="unformatted",status="unknown")
WRITE(8) xq
CLOSE(unit=8)

fileName = trim('test_spline_Bz.out')
OPEN(unit=8,file=fileName,form="unformatted",status="unknown")
WRITE(8) spline_Bz%y
CLOSE(unit=8)

fileName = trim('test_spline_x.out')
OPEN(unit=8,file=fileName,form="unformatted",status="unknown")
WRITE(8) spline_Bz%x
CLOSE(unit=8)

END PROGRAM