Commit 9a41c81f authored by Kabelitz, Matthew Edward's avatar Kabelitz, Matthew Edward Committed by Sooyoung Choi
Browse files

Changed to Waitany for nonblocking

parent 2eed2386
Loading
Loading
Loading
Loading
+62 −34
Original line number Diff line number Diff line
@@ -701,7 +701,7 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)
  INTEGER(SIK) :: rank,k
  REAL(SRK),ALLOCATABLE :: recvResult(:,:),sendResult(:,:),tmpProduct(:)
  INTEGER(SIK) :: sendRequests(MATVEC_SLOTS),recvRequests(MATVEC_SLOTS)
  INTEGER(SIK) :: lowIdx,highIdx,sendCounter,recvCounter
  INTEGER(SIK) :: lowIdx,highIdx
#ifdef HAVE_MPI
  ! Get rank
  INTEGER(SIK) :: ctRecv(MATVEC_SLOTS),srcRank,destRank
@@ -717,10 +717,8 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)
  ! This allows one to be used for computation and the rest for
  ! communication.
  ! Which one to write to will be determined by *Counter MOD MATVEC_SLOTS
  sendCounter=0
  recvCounter=0
  sendRequests=0
  recvRequests=0
  sendRequests=MPI_REQUEST_NULL
  recvRequests=MPI_REQUEST_NULL
  ! The recv/sendResult array will sometimes hold it's full length,
  ! and sometimes less.
  ALLOCATE(recvResult(thisMatrix%iOffsets(2),MATVEC_SLOTS))
@@ -772,9 +770,7 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)
    IF (thismatrix%chunks(destRank+1)%isInit) THEN

      ! Decide whether to send whole vector or multiple sparse
      IF (2*thisMatrix%chunks(destRank+1)%nnz/3 >= thisMatrix%chunks(destRank+1)%n) THEN
        sendCounter=sendCounter+1
        idxTmp=MOD(sendCounter,MATVEC_SLOTS)+1
      IF (thisMatrix%chunks(destRank+1)%nnz >= thisMatrix%chunks(destRank+1)%n) THEN
        ! Check if we can safely write to sendRequests
        CALL pop_send(sendResult,idxTmp,sendRequests)
        CALL BLAS_matvec(THISMATRIX=thisMatrix%chunks(destRank+1),X=x,&
@@ -783,8 +779,6 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)
            MPI_DOUBLE_PRECISION,destRank,0,thisMatrix%comm,sendRequests(idxTmp),mpierr)
      ELSE
        ! Gather and send several sparse vectors
        sendCounter=sendCounter+1
        idxTmp=MOD(sendCounter,MATVEC_SLOTS)+1
        CALL pop_send(sendResult,idxTmp,sendRequests)
        lowIdx=1
        DO k=1,SIZE(thisMatrix%chunks(destRank+1)%bandIdx)
@@ -801,23 +795,19 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)
    srcRank = MODULO(rank-i,nProc)
    IF (thisMatrix%incIdxStt(srcRank+1) == -1) THEN
      ! We are receiving a whole vector at once
      recvCounter=recvCounter+1
      idxTmp=MOD(recvCounter,MATVEC_SLOTS)+1
      ! If we've filled up the available storage, we need
      ! to wait for communication to finish up
      CALL pop_recv(tmpProduct,recvResult,thisMatrix, &
          ctRecv,idxTmp,recvRequests)
      ctRecv(MOD(recvCounter,MATVEC_SLOTS)+1)=ctDefault
      CALL MPI_IRecv(recvResult(1:ctDefault,idxTmp), ctDefault, &
      ctRecv(idxTmp)=SIZE(tmpProduct)
      CALL MPI_IRecv(recvResult(1:ctRecv(idxTmp),idxTmp), ctRecv(idxTmp), &
          MPI_DOUBLE_PRECISION,srcRank,0,thisMatrix%comm, &
          recvRequests(idxTmp),mpierr)
    ELSEIF (thisMatrix%incIdxStt(srcRank+1) > 0) THEN
      ! We are receiving multiple sparse chunks gathered together
      recvCounter=recvCounter+1
      idxTmp=MOD(recvCounter,MATVEC_SLOTS)+1
      CALL pop_recv(tmpProduct,recvResult,thisMatrix, &
          ctRecv,idxTmp,recvRequests)
      ctRecv(idxTmp)= -srcRank
      ctRecv(idxTmp)= -srcRank-1
      lowIdx=thisMatrix%incIdxStt(srcRank+1)
      highIdx=thisMatrix%incIdxStp(srcRank+1)
      CALL MPI_IRecv(recvResult(1:(highIdx-lowIdx+1),idxTmp),highIdx-lowIdx+1, &
@@ -826,11 +816,13 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)
  ENDDO
  ! We've finished calling irecv. Wait for remaining
  ! requests to finish:
  DO idxTmp=1,MATVEC_SLOTS
    CALL pop_recv(tmpProduct,recvResult,thisMatrix,ctRecv,idxTmp,recvRequests)
  DO k=1,MATVEC_SLOTS
    idxTmp=k
    CALL pop_recv(tmpProduct,recvResult,thisMatrix,ctRecv,idxTmp,recvRequests,.TRUE.)
  ENDDO
  DO idxTmp=1,MATVEC_SLOTS
    CALL pop_send(sendResult,idxTmp,sendRequests)
  DO k=1,MATVEC_SLOTS
    idxTmp=k
    CALL pop_send(sendResult,idxTmp,sendRequests,.TRUE.)
  ENDDO
#endif

@@ -892,25 +884,43 @@ ENDSUBROUTINE matvec_DistrBandedMatrixType
!> @param req The array of active requests for data
!> @param idxReq The array of requests for indices
!>
SUBROUTINE pop_recv(acc,valBuf,thisMat,ctBuf,idx,req)
SUBROUTINE pop_recv(acc,valBuf,thisMat,ctBuf,idx,req,f)
  REAL(SRK), INTENT(INOUT) :: acc(:)
  REAL(SRK), INTENT(INOUT) :: valBuf(:,:)
  CLASS(DistributedBandedMatrixType),INTENT(INOUT) :: thisMat
  INTEGER(SIK), INTENT(INOUT) :: ctBuf(MATVEC_SLOTS)
  INTEGER(SIK), INTENT(IN) :: idx
  INTEGER(SIK), INTENT(INOUT) :: idx
  INTEGER(SIK), INTENT(INOUT) :: req(MATVEC_SLOTS)
  INTEGER(SIK) :: stt,stp,mpierr,i

  IF(req(idx) == 0) RETURN
  LOGICAL(SBK),OPTIONAL,INTENT(IN) :: f
  INTEGER(SIK) :: stt,stp,mpierr,i,rank
  LOGICAL(SBK) :: force
  CALL MPI_Comm_Rank(thisMat%comm,rank,mpierr)

  force=.FALSE.
  IF (PRESENT(f)) force=f
  IF (force) THEN
    IF (req(idx) == MPI_REQUEST_NULL) RETURN
    CALL MPI_Wait(req(idx),MPI_STATUS_IGNORE,mpierr)
  ELSE
    IF (ANY(req == MPI_REQUEST_NULL)) THEN
      DO i=1,SIZE(req)
        IF (req(i) == MPI_REQUEST_NULL) THEN
          idx=i
          RETURN
        ENDIF
      ENDDO
    ELSE
      CALL MPI_WaitAny(SIZE(req),req,idx,MPI_STATUS_IGNORE,mpierr)
    ENDIF
  ENDIF

  IF(ctBuf(idx) > 0) THEN
    acc(1:ctBuf(idx))=acc(1:ctBuf(idx))+valBuf(1:ctBuf(idx),idx)
  ELSE
    stt=thisMat%incIdxStt(-ctBuf(idx)+1)
    stp=thisMat%incIdxStp(-ctBuf(idx)+1)
    stt=thisMat%incIdxStt(-ctBuf(idx))
    stp=thisMat%incIdxStp(-ctBuf(idx))
    DO i=stt,stp
      acc(thisMat%incIdxMap(i))=acc(thisMat%incIdxMap(i))+valBuf(i,idx)
      acc(thisMat%incIdxMap(i))=acc(thisMat%incIdxMap(i))+valBuf(i-stt+1,idx)
    ENDDO
  ENDIF
ENDSUBROUTINE pop_recv
@@ -927,16 +937,34 @@ ENDSUBROUTINE pop_recv
!> @param req The array of requests for data
!> @param idxReq The array of requests for indices
!>
SUBROUTINE pop_send(valBuf, idx, req)
SUBROUTINE pop_send(valBuf,idx,req,f)
  REAL(SRK), INTENT(INOUT) :: valBuf(:,:)
  !> The buffer slot to pop
  INTEGER(SIK), INTENT(IN) :: idx
  INTEGER(SIK), INTENT(INOUT) :: idx
  !> The array of requests for data
  INTEGER(SIK), INTENT(INOUT) :: req(MATVEC_SLOTS)
  INTEGER(SIK) :: mpierr
  LOGICAL(SBK),OPTIONAL,INTENT(IN) :: f
  INTEGER(SIK) :: mpierr,i
  LOGICAL(SBK) :: force

  IF(req(idx) == 0) RETURN
  force=.FALSE.
  IF (PRESENT(f)) force=f
  !IF(req(idx) == MPI_REQUEST_NULL) RETURN
  IF (force) THEN
    IF (req(idx) == MPI_REQUEST_NULL) RETURN
    CALL MPI_Wait(req(idx),MPI_STATUS_IGNORE,mpierr)
  ELSE
    IF (ANY(req == MPI_REQUEST_NULL)) THEN
      DO i=1,SIZE(req)
        IF (req(i) == MPI_REQUEST_NULL) THEN
          idx=i
          RETURN
        ENDIF
      ENDDO
    ELSE
      CALL MPI_WaitAny(SIZE(req),req,idx,MPI_STATUS_IGNORE,mpierr)
    ENDIF
  ENDIF
ENDSUBROUTINE pop_send
#endif
!
+8 −7
Original line number Diff line number Diff line
@@ -810,7 +810,7 @@ SUBROUTINE assemble_DistributedBandedMatrixType(thisMatrix,ierr)
    !  -1: All
    nTransmit=0
    IF(thisMatrix%chunks(i)%isInit) THEN
      IF(2*thisMatrix%chunks(i)%nnz/3 < thisMatrix%chunks(i)%n) THEN
      IF(thisMatrix%chunks(i)%nnz < thisMatrix%chunks(i)%n) THEN
        DO j=1,SIZE(thisMatrix%chunks(i)%bandIdx)
          nTransmit=nTransmit+SIZE(thisMatrix%chunks(i)%bands(j)%jIdx)
        ENDDO
@@ -825,6 +825,7 @@ SUBROUTINE assemble_DistributedBandedMatrixType(thisMatrix,ierr)
        thisMatrix%comm,mpierr)
    ! Have rank i-1 allocate
    IF(rank == i-1) THEN
      nRecv(i)=0
      nTransmit=0
      DO j=1,SIZE(nRecv)
        IF (nRecv(j) > 0) THEN
@@ -836,20 +837,20 @@ SUBROUTINE assemble_DistributedBandedMatrixType(thisMatrix,ierr)
      ALLOCATE(thisMatrix%incIdxStp(nProc))
      thisMatrix%incIdxStt=0
      thisMatrix%incIdxStp=0
      DO j=1,nProc
      ! Use nTransmit as counter
      nTransmit=1
      DO j=1,nProc
        IF(j == i) THEN
          thisMatrix%incIdxStt(j)=-1
          thisMatrix%incIdxStp(j)=-1
          thisMatrix%incIdxStt(j)=0
          thisMatrix%incIdxStp(j)=0
        ELSE
          IF(nRecv(j) > 0) THEN
            thisMatrix%incIdxStt(j)=nTransmit
            thisMatrix%incIdxStp(j)=nTransmit+nRecv(j)-1
            ! We receive data (and don't want to bother sending idx's at mvmult
            ! time)
            CALL MPI_Recv(thisMatrix%incIdxMap(nTransmit),nRecv(j),MPI_INTEGER, &
                j-1,0,thisMatrix%comm,MPI_STATUS_IGNORE, mpierr)
            CALL MPI_Recv(thisMatrix%incIdxMap(nTransmit:nTransmit+nRecv(j)-1), &
                nRecv(j),MPI_INTEGER,j-1,0,thisMatrix%comm,MPI_STATUS_IGNORE, mpierr)
            nTransmit=nTransmit+nRecv(j)
          ELSE IF (nRecv(j) == -1) THEN
            thisMatrix%incIdxStt(j)=-1