Native SpMV Kernel Update and nnz changes (#316) (ef0deee4) · Commits · Futility / Futility

src/MatrixTypes.f90

+136 −140

Original line number	Diff line number	Diff line
		@@ -111,7 +111,7 @@ PUBLIC :: BandedMatrixType
		PUBLIC :: DistributedBandedMatrixType
		PUBLIC :: DistributedBlockBandedMatrixType
		PUBLIC :: SparseMatrixType
		INTEGER(SIK),PARAMETER,PUBLIC :: MATVEC_SLOTS=4
		INTEGER(SIK),PARAMETER,PUBLIC :: MATVEC_SLOTS=10
		! PETSc implementations
		#ifdef FUTILITY_HAVE_PETSC
		PUBLIC :: PETScMatrixType
		@@ -698,18 +698,17 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)
		CHARACTER(LEN=1),INTENT(IN) :: ul
		CHARACTER(LEN=1),INTENT(IN) :: d
		INTEGER(SIK),INTENT(IN) :: incx
		INTEGER(SIK) :: rank,k
		INTEGER(SIK),ALLOCATABLE :: recvIdx(:,:), sendIdx(:,:)
		INTEGER(SIK) :: rank,k,lowIdx,highIdx
		REAL(SRK),ALLOCATABLE :: recvResult(:,:),sendResult(:,:),tmpProduct(:)
		INTEGER(SIK) :: sendRequests(MATVEC_SLOTS),sendIdxRequests(MATVEC_SLOTS)
		INTEGER(SIK) :: recvRequests(MATVEC_SLOTS),recvIdxRequests(MATVEC_SLOTS)
		INTEGER(SIK) :: lowIdx,highIdx,sendCounter,recvCounter
		#ifdef HAVE_MPI
		! Get rank
		INTEGER(SIK) :: sendRequests(MATVEC_SLOTS),recvRequests(MATVEC_SLOTS)
		INTEGER(SIK) :: ctRecv(MATVEC_SLOTS),srcRank,destRank
		INTEGER(SIK) :: i,idxTmp,cnt,ctDefault,mpierr,nproc
		INTEGER(SIK) :: i,idxTmp,ctDefault,mpierr,nproc
		CALL MPI_Comm_rank(thisMatrix%comm,rank,mpierr)
		CALL MPI_Comm_Size(thisMatrix%comm,nProc,mpierr)
		sendRequests=MPI_REQUEST_NULL
		recvRequests=MPI_REQUEST_NULL
		#else
		rank=0
		#endif
		@@ -719,55 +718,13 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)
		! This allows one to be used for computation and the rest for
		! communication.
		! Which one to write to will be determined by *Counter MOD MATVEC_SLOTS
		sendCounter=0
		recvCounter=0
		sendRequests=0
		sendIdxRequests=0
		recvRequests=0
		recvIdxRequests=0
		! The recv/sendResult array will sometimes hold it's full length,
		! and sometimes less.
		! The variable "cnt" will be used to determine this at
		! each iteration
		ALLOCATE(recvIdx(thisMatrix%iOffsets(2),MATVEC_SLOTS))
		ALLOCATE(recvResult(thisMatrix%iOffsets(2),MATVEC_SLOTS))
		ALLOCATE(sendIdx(thisMatrix%iOffsets(2),MATVEC_SLOTS))
		ALLOCATE(sendResult(thisMatrix%iOffsets(2),MATVEC_SLOTS))
		ALLOCATE(tmpProduct(thisMatrix%iOffsets(rank+2)- &
		thisMatrix%iOffsets(rank+1)))

		! First, take care of locally held data.
		SELECT TYPE(thisMatrix)
		TYPE IS(DistributedBlockBandedMatrixType)
		IF(.NOT. thisMatrix%blockMask) THEN
		DO k=1,thisMatrix%nlocalBlocks
		lowIdx=(k-1)*thisMatrix%blockSize+1
		highIdx=lowIdx-1+thisMatrix%blockSize
		CALL matvec_MatrixType(THISMATRIX=thisMatrix%blocks(k),X=x(lowIdx:highIdx), &
		Y=tmpProduct(lowIdx:highIdx),ALPHA=1.0_SRK,BETA=0.0_SRK)
		ENDDO
		IF(thisMatrix%chunks(rank+1)%isInit) THEN
		CALL BLAS_matvec(THISMATRIX=thisMatrix%chunks(rank+1),X=x, &
		y=tmpProduct,ALPHA=1.0_SRK,BETA=1.0_SRK)
		ENDIF
		ELSE
		IF(thisMatrix%chunks(rank+1)%isInit) THEN
		CALL BLAS_matvec(THISMATRIX=thisMatrix%chunks(rank+1),X=x, &
		y=tmpProduct,ALPHA=1.0_SRK,BETA=0.0_SRK)
		ELSE
		tmpProduct=0.0_SRK
		ENDIF
		ENDIF
		TYPE IS(DistributedBandedMatrixType)
		IF(thisMatrix%chunks(rank+1)%isInit) THEN
		CALL BLAS_matvec(THISMATRIX=thisMatrix%chunks(rank+1),X=x, &
		y=tmpProduct,ALPHA=1.0_SRK,BETA=0.0_SRK)
		ELSE
		tmpProduct=0.0_SRK
		ENDIF
		CLASS DEFAULT
		tmpProduct = 0.0_SRK
		ENDSELECT

		#ifdef HAVE_MPI
		! On each rank, loop over the chunks held (on diagonal moving down)
		@@ -778,79 +735,82 @@ SUBROUTINE matvec_DistrBandedMatrixType(thisMatrix,x,y,t,ul,d,incx,a,b)

		! First do local computation and send
		IF (thismatrix%chunks(destRank+1)%isInit) THEN

		! Decide whether to send whole vector or multiple sparse
		IF (2*thisMatrix%chunks(destRank+1)%nnz/3 >= thisMatrix%chunks(destRank+1)%n) THEN
		sendCounter=sendCounter+1
		idxTmp=MOD(sendCounter,MATVEC_SLOTS)+1
		IF (thisMatrix%chunks(destRank+1)%nnz >= thisMatrix%chunks(destRank+1)%n) THEN
		! Check if we can safely write to sendRequests
		CALL pop_send(sendResult,sendIdx,idxTmp,sendRequests,sendIdxRequests)
		CALL pop_send(sendResult,idxTmp,sendRequests)
		CALL BLAS_matvec(THISMATRIX=thisMatrix%chunks(destRank+1),X=x,&
		y=sendResult(1:ctDefault,idxTmp),ALPHA=1.0_SRK,BETA=0.0_SRK)
		CALL MPI_ISend(sendResult(1:ctDefault,idxTmp),ctDefault, &
		MPI_DOUBLE_PRECISION,destRank,0,thisMatrix%comm,sendRequests(idxTmp),mpierr)
		ELSE
		! Send several sparse vectors
		! Gather and send several sparse vectors
		CALL pop_send(sendResult,idxTmp,sendRequests)
		lowIdx=1
		DO k=1,SIZE(thisMatrix%chunks(destRank+1)%bandIdx)
		sendCounter=sendCounter+1
		idxTmp=MOD(sendCounter,MATVEC_SLOTS)+1
		CALL pop_send(sendResult,sendIdx,idxTmp,sendRequests,sendIdxRequests)

		! compute destination indices
		! perform special-case multiplication from single band here
		cnt=SIZE(thisMatrix%chunks(destRank+1)%bands(k)%jIdx)
		sendIdx(1:cnt,idxTmp)=thisMatrix%chunks(destRank+1)%bands(k)%jIdx &
		-thisMatrix%chunks(destRank+1)%bandIdx(k)
		CALL MPI_ISend(sendIdx(1:cnt,idxTmp),cnt,MPI_INTEGER,destRank,0, &
		thisMatrix%comm,sendIdxRequests(idxTmp), mpierr)
		sendResult(1:cnt,idxTmp)=thisMatrix%chunks(destRank+1)%bands(k)%elem &
		highidx=lowIdx+SIZE(thisMatrix%chunks(destRank+1)%bands(k)%jIdx)-1
		sendResult(lowIdx:highIdx,idxTmp)=thisMatrix%chunks(destRank+1)%bands(k)%elem &
		*x(thisMatrix%chunks(destRank+1)%bands(k)%jIdx)
		CALL MPI_ISend(sendResult(1:cnt,idxTmp),cnt,MPI_DOUBLE_PRECISION, &
		destRank,0,thisMatrix%comm,sendRequests(idxTmp), mpierr)
		lowIdx=highIdx+1
		ENDDO
		CALL MPI_ISend(sendResult(1:highIdx,idxTmp),highIdx,MPI_DOUBLE_PRECISION, &
		destRank,0,thisMatrix%comm,sendRequests(idxTmp), mpierr)
		ENDIF
		ENDIF
		! We might receive data from rank MOD(rank-i,nproc)
		srcRank = MODULO(rank-i,nProc)
		IF (ALLOCATED(thisMatrix%bandSizes(srcRank+1)%p)) THEN
		IF (thisMatrix%bandSizes(srcRank+1)%p(1) < 0) THEN
		IF (thisMatrix%incIdxStt(srcRank+1) == -1) THEN
		! We are receiving a whole vector at once
		recvCounter=recvCounter+1
		idxTmp=MOD(recvCounter,MATVEC_SLOTS)+1
		! If we've filled up the available storage, we need
		! to wait for communication to finish up
		CALL pop_recv(tmpProduct,recvResult,recvIdx,ctRecv,idxTmp, &
		recvRequests, recvIdxRequests)
		ctRecv(MOD(recvCounter,MATVEC_SLOTS)+1)=ctDefault
		CALL MPI_IRecv(recvResult(1:ctDefault,idxTmp), ctDefault, &
		CALL pop_recv(tmpProduct,recvResult,thisMatrix, &
		ctRecv,idxTmp,recvRequests)
		ctRecv(idxTmp)=SIZE(tmpProduct)
		CALL MPI_IRecv(recvResult(1:ctRecv(idxTmp),idxTmp), ctRecv(idxTmp), &
		MPI_DOUBLE_PRECISION,srcRank,0,thisMatrix%comm, &
		recvRequests(idxTmp),mpierr)
		ELSE
		! We are receiving multiple sparse chunks
		DO k=1,SIZE(thisMatrix%bandSizes(srcRank+1)%p)
		recvCounter=recvCounter+1
		idxTmp=MOD(recvCounter,MATVEC_SLOTS)+1
		! If we've filled up the available storage, we need
		! to wait for communication to finish up
		CALL pop_recv(tmpProduct,recvResult,recvIdx,ctRecv,idxTmp, &
		recvRequests, recvIdxRequests)
		ctRecv(idxTmp)=-thisMatrix%bandSizes(srcRank+1)%p(k)
		CALL MPI_IRecv(recvIdx(1:ctRecv(idxTmp),idxTmp),-ctRecv(idxTmp), &
		MPI_INTEGER,srcRank,0,thisMatrix%comm,recvIdxRequests(idxTmp), mpierr)
		CALL MPI_IRecv(recvResult(1:ctRecv(idxTmp),idxTmp),-ctRecv(idxTmp), &
		ELSEIF (thisMatrix%incIdxStt(srcRank+1) > 0) THEN
		! We are receiving multiple sparse chunks gathered together
		CALL pop_recv(tmpProduct,recvResult,thisMatrix, &
		ctRecv,idxTmp,recvRequests)
		ctRecv(idxTmp)= -srcRank-1
		lowIdx=thisMatrix%incIdxStt(srcRank+1)
		highIdx=thisMatrix%incIdxStp(srcRank+1)
		CALL MPI_IRecv(recvResult(1:(highIdx-lowIdx+1),idxTmp),highIdx-lowIdx+1, &
		MPI_DOUBLE_PRECISION,srcRank,0,thisMatrix%comm,recvRequests(idxTmp), mpierr)
		ENDDO
		ENDIF
		ENDDO
		#endif
		! Now, take care of locally held data.
		SELECT TYPE(thisMatrix)
		TYPE IS(DistributedBlockBandedMatrixType)
		IF(thisMatrix%chunks(rank+1)%isInit) THEN
		CALL BLAS_matvec(THISMATRIX=thisMatrix%chunks(rank+1),X=x, &
		y=tmpProduct,ALPHA=1.0_SRK,BETA=1.0_SRK)
		ENDIF
		IF(.NOT. thisMatrix%blockMask) THEN
		DO k=1,thisMatrix%nlocalBlocks
		lowIdx=(k-1)*thisMatrix%blockSize+1
		highIdx=lowIdx-1+thisMatrix%blockSize
		CALL matvec_MatrixType(THISMATRIX=thisMatrix%blocks(k),X=x(lowIdx:highIdx), &
		Y=tmpProduct(lowIdx:highIdx),ALPHA=1.0_SRK,BETA=1.0_SRK)
		ENDDO
		! We've finished calling irecv. Wait for remaining
		! requests to finish:
		DO idxTmp=1,MATVEC_SLOTS
		CALL pop_recv(tmpProduct, recvResult, recvIdx, ctRecv, idxTmp, &
		recvRequests, recvIdxRequests)
		ENDIF
		TYPE IS(DistributedBandedMatrixType)
		IF(thisMatrix%chunks(rank+1)%isInit) THEN
		CALL BLAS_matvec(THISMATRIX=thisMatrix%chunks(rank+1),X=x, &
		y=tmpProduct,ALPHA=1.0_SRK,BETA=1.0_SRK)
		ENDIF
		ENDSELECT
		#if HAVE_MPI
		! Wait for remaining requests to finish:
		DO k=1,MATVEC_SLOTS
		idxTmp=k
		CALL pop_recv(tmpProduct,recvResult,thisMatrix,ctRecv,idxTmp,recvRequests,.TRUE.)
		ENDDO
		DO idxTmp=1,MATVEC_SLOTS
		CALL pop_send(sendResult,sendIdx,idxTmp,sendRequests,sendIdxRequests)
		DO k=1,MATVEC_SLOTS
		idxTmp=k
		CALL pop_send(sendResult,idxTmp,sendRequests,.TRUE.)
		ENDDO
		#endif

		@@ -904,66 +864,102 @@ ENDSUBROUTINE matvec_DistrBandedMatrixType
		!> @brief Helper routine that handles the movement of data out of the
		!> recieving buffer. This buffer has multiple "slots" that are either
		!> being worked on directly or receiving data from MPI in background.
		!> Empty slots are denoted with MPI_REQUEST_NULL
		!> @param acc The accumulating vector
		!> @param valBuf The recv buffer for values
		!> @param thisMat The matrixtype used in multiplication
		!> @param ctBuf Count of elements in each buffer slot
		!> @param idxBuf The recv buffer for indices
		!> @param idx The buffer slot to pop
		!> @param idx The empty buffer slot that was popped
		!> @param req The array of active requests for data
		!> @param idxReq The array of requests for indices
		!> @param f Optional bool to flush all requests
		!>
		SUBROUTINE pop_recv(acc,valBuf,idxBuf,ctBuf,idx,req,idxReq)
		SUBROUTINE pop_recv(acc,valBuf,thisMat,ctBuf,idx,req,f)
		!> Local vector data
		REAL(SRK), INTENT(INOUT) :: acc(:)
		!> List of buffers
		REAL(SRK), INTENT(INOUT) :: valBuf(:,:)
		!> Matrix used in SpMV
		CLASS(DistributedBandedMatrixType),INTENT(INOUT) :: thisMat
		!> Array of buffer sizes
		INTEGER(SIK), INTENT(INOUT) :: ctBuf(MATVEC_SLOTS)
		INTEGER(SIK), INTENT(INOUT) :: idxBuf(:,:)
		INTEGER(SIK), INTENT(IN) :: idx
		!> The buffer slot popped
		INTEGER(SIK), INTENT(OUT) :: idx
		!> List of MPI Requests
		INTEGER(SIK), INTENT(INOUT) :: req(MATVEC_SLOTS)
		INTEGER(SIK), INTENT(INOUT) :: idxReq(MATVEC_SLOTS)
		INTEGER(SIK) :: mpierr

		IF(req(idx) == 0 .AND. idxReq(idx) == 0) RETURN
		!> Optional argument to flush all buffers
		LOGICAL(SBK),OPTIONAL,INTENT(IN) :: f
		INTEGER(SIK) :: stt,stp,mpierr,i,rank
		LOGICAL(SBK) :: force
		CALL MPI_Comm_Rank(thisMat%comm,rank,mpierr)

		force=.FALSE.
		IF (PRESENT(f)) force=f
		IF (force) THEN
		IF (req(idx) == MPI_REQUEST_NULL) RETURN
		CALL MPI_Wait(req(idx),MPI_STATUS_IGNORE,mpierr)
		ELSE
		IF (ANY(req == MPI_REQUEST_NULL)) THEN
		DO i=1,SIZE(req)
		IF (req(i) == MPI_REQUEST_NULL) THEN
		idx=i
		RETURN
		ENDIF
		ENDDO
		ELSE
		CALL MPI_WaitAny(SIZE(req),req,idx,MPI_STATUS_IGNORE,mpierr)
		ENDIF
		ENDIF

		IF(ctBuf(idx) > 0) THEN
		acc(1:ctBuf(idx))=acc(1:ctBuf(idx))+valBuf(1:ctBuf(idx),idx)
		ELSE
		CALL MPI_Wait(idxReq(idx),MPI_STATUS_IGNORE,mpierr)
		idxReq(idx)=0
		acc(idxBuf(1:-ctBuf(idx),idx))=acc(idxBuf(1:-ctBuf(idx),idx)) &
		+valBuf(1:-ctBuf(idx),idx)
		stt=thisMat%incIdxStt(-ctBuf(idx))
		stp=thisMat%incIdxStp(-ctBuf(idx))
		DO i=stt,stp
		acc(thisMat%incIdxMap(i))=acc(thisMat%incIdxMap(i))+valBuf(i-stt+1,idx)
		ENDDO
		ENDIF
		ENDSUBROUTINE pop_recv

		!
		!-------------------------------------------------------------------------------
		!> @brief Helper routine that keeps of data in the send buffer while
		!> @brief Helper routine that keeps track of data in the send buffer while
		!> MPI is still working. This buffer has multiple "slots" that are
		!> either being worked on directly or contain data being sent by
		!> MPI in background.
		!> MPI in background. Empty slots are denoted with MPI_REQUEST_NULL
		!> @param valBuf The send buffer for values
		!> @param idxBuf The send buffer for indices
		!> @param idx The buffer slot to pop
		!> @param idx The buffer slot popped
		!> @param req The array of requests for data
		!> @param idxReq The array of requests for indices
		!> @param f Optional bool to flush all requests
		!>
		SUBROUTINE pop_send(valBuf, idxBuf, idx, req, idxReq)
		SUBROUTINE pop_send(valBuf,idx,req,f)
		!> Set of data buffers
		REAL(SRK), INTENT(INOUT) :: valBuf(:,:)
		!> The send buffer for indices
		INTEGER(SIK), INTENT(INOUT) :: idxBuf(:,:)
		!> The buffer slot to pop
		INTEGER(SIK), INTENT(IN) :: idx
		!> The array of requests for data
		!> The buffer slot popped
		INTEGER(SIK), INTENT(OUT) :: idx
		!> The array of MPI requests
		INTEGER(SIK), INTENT(INOUT) :: req(MATVEC_SLOTS)
		!> The array of requests for indices
		INTEGER(SIK), INTENT(INOUT) :: idxReq(MATVEC_SLOTS)
		INTEGER(SIK) :: mpierr

		IF(req(idx) == 0 .AND. idxReq(idx) == 0) RETURN
		!> Optional argument to flush all buffers
		LOGICAL(SBK),OPTIONAL,INTENT(IN) :: f
		INTEGER(SIK) :: mpierr,i
		LOGICAL(SBK) :: force

		force=.FALSE.
		IF (PRESENT(f)) force=f
		IF (force) THEN
		IF (req(idx) == MPI_REQUEST_NULL) RETURN
		CALL MPI_Wait(req(idx),MPI_STATUS_IGNORE,mpierr)
		IF(idxReq(idx) /= 0) THEN
		CALL MPI_Wait(idxReq(idx),MPI_STATUS_IGNORE,mpierr)
		idxReq(idx)=0
		ELSE
		IF (ANY(req == MPI_REQUEST_NULL)) THEN
		DO i=1,SIZE(req)
		IF (req(i) == MPI_REQUEST_NULL) THEN
		idx=i
		RETURN
		ENDIF
		ENDDO
		ELSE
		CALL MPI_WaitAny(SIZE(req),req,idx,MPI_STATUS_IGNORE,mpierr)
		ENDIF
		ENDIF
		ENDSUBROUTINE pop_send
		#endif

src/MatrixTypes_Native.f90

+88 −48

Original line number	Diff line number	Diff line
		@@ -132,11 +132,6 @@ TYPE Band
		REAL(SRK), ALLOCATABLE :: elem(:)
		ENDTYPE Band

		!> @brief Integer array used for 'ragged' storage of band contents
		TYPE IntPtr
		INTEGER(SIK),ALLOCATABLE :: p(:)
		ENDTYPE IntPtr

		!> @brief The basic banded matrix type
		TYPE,EXTENDS(MatrixType) :: BandedMatrixType
		!> Map of band indices stored (-m to n)
		@@ -194,13 +189,15 @@ TYPE,EXTENDS(DistributedMatrixType) :: DistributedBandedMatrixType
		!> The column of banded matrix 'chunks' stored locally
		TYPE(BandedMatrixType),ALLOCATABLE :: chunks(:)
		!> Number of nonzero elements
		INTEGER(SIK) :: nnz
		INTEGER(SLK) :: nnz
		!> Number of columns
		INTEGER(SIK) :: m
		!> Block size (smallest indivisble unit)
		INTEGER(SIK) :: blockSize
		!> Array of band sizes used to determine optimal communication
		TYPE(IntPtr), ALLOCATABLE :: bandSizes(:)
		INTEGER(SIK), ALLOCATABLE :: incIdxMap(:)
		INTEGER(SIK), ALLOCATABLE :: incIdxStt(:)
		INTEGER(SIK), ALLOCATABLE :: incIdxStp(:)
		!> Temporary containers used before (and deallocated after) assembly
		INTEGER(SIK), ALLOCATABLE :: iTmp(:),jTmp(:)
		REAL(SRK),ALLOCATABLE :: elemTmp(:)
		@@ -482,7 +479,9 @@ SUBROUTINE init_DistributedBandedMatrixParam(matrix,Params)
		#ifdef HAVE_MPI
		CHARACTER(LEN=*),PARAMETER :: myName='init_DistributedBandedMatrixParam'
		TYPE(ParamType) :: validParams
		INTEGER(SIK) :: n,m,nnz,commID,rank,mpierr,nproc,i,blocksize,nlocal
		INTEGER(SIK),ALLOCATABLE :: onnz(:),dnnz(:)
		INTEGER(SIK) :: n,m,commID,rank,mpierr,nproc,i,blocksize,nlocal,nnz_int
		INTEGER(SLK) :: nnz

		!Check to set up required and optional param lists.
		IF(.NOT.MatrixType_Paramsflag) CALL MatrixTypes_Declare_ValidParams()
		@@ -496,10 +495,22 @@ SUBROUTINE init_DistributedBandedMatrixParam(matrix,Params)
		CALL validParams%get('MatrixType->n',n)
		CALL validParams%get('MatrixType->m',m)
		CALL validParams%get('MatrixType->MPI_Comm_ID',commID)
		CALL validParams%get('MatrixType->nnz',nnz)
		CALL validParams%get('MatrixType->nnz',nnz_int)
		CALL validParams%get('MatrixType->blockSize',blockSize)
		CALL validParams%get('MatrixType->nlocal',nlocal)
		CALL validParams%clear()
		nnz=INT(nnz_int,KIND=8)
		IF(nnz_int <= 1) THEN
		IF (validParams%has("MatrixType->dnnz") .AND. validParams%has("MatrixType->onnz")) THEN
		nnz=0_SLK
		CALL validParams%get("MatrixType->onnz",onnz)
		CALL validParams%get("MatrixType->dnnz",dnnz)
		DO i=1,SIZE(onnz)
		nnz=nnz+INT(onnz(i),KIND=SLK)
		nnz=nnz+INT(dnnz(i),KIND=SLK)
		ENDDO
		CALL MPI_AllReduce(nnz,MPI_IN_PLACE,1,MPI_LONG,MPI_SUM,commID,mpierr)
		ENDIF
		ENDIF

		IF(.NOT. matrix%isInit) THEN
		IF(n <= 1) THEN
		@@ -575,12 +586,12 @@ SUBROUTINE init_DistributedBandedMatrixParam(matrix,Params)
		matrix%nnz=nnz
		matrix%blockSize = blockSize
		matrix%comm=commID
		ALLOCATE(matrix%bandSizes(nProc))
		ENDIF
		ELSE
		CALL eMatrixType%raiseError('Incorrect call to '// &
		modName//'::'//myName//' - MatrixType already initialized')
		ENDIF
		CALL validParams%clear()
		#endif
		ENDSUBROUTINE init_DistributedBandedMatrixParam

		@@ -690,10 +701,10 @@ SUBROUTINE assemble_BandedMatrixType(thisMatrix)
		ALLOCATE(idxOrig(thisMatrix%counter))
		IF (thisMatrix%isAssembled) RETURN
		DO i=1,thisMatrix%counter
		iLong=INT(thisMatrix%iTmp(i),kind=SLK)
		jLong=INT(thisMatrix%jTmp(i),kind=SLK)
		nLong=INT(thisMatrix%n,kind=SLK)
		mLong=INT(thisMatrix%m,kind=SLK)
		iLong=INT(thisMatrix%iTmp(i),KIND=SLK)
		jLong=INT(thisMatrix%jTmp(i),KIND=SLK)
		nLong=INT(thisMatrix%n,KIND=SLK)
		mLong=INT(thisMatrix%m,KIND=SLK)
		idxOrig(i)=i
		! The diagonal rank counts upwards as one moves southeast in the matrix,
		! starting at a large negative number in the bottom left, and reaching
		@@ -764,14 +775,14 @@ SUBROUTINE assemble_DistributedBandedMatrixType(thisMatrix,ierr)
		INTEGER(SIK),INTENT(OUT),OPTIONAL :: ierr
		#ifdef HAVE_MPI
		TYPE(ParamType) :: bandedPList
		INTEGER(SIK) :: mpierr, rank, nproc, i, j, nBandLocal
		INTEGER(SIK),ALLOCATABLE :: nnzPerChunk(:),nband(:),bandSizeTmp(:)
		INTEGER(SIK) :: mpierr,rank,nproc,i,j,nTransmit,stt,stp
		INTEGER(SIK),ALLOCATABLE :: nnzPerChunk(:),nRecv(:),idxMapTmp(:)

		REQUIRE(thisMatrix%isInit)
		CALL MPI_Comm_rank(thisMatrix%comm,rank,mpierr)
		CALL MPI_Comm_size(thisMatrix%comm,nproc,mpierr)
		ALLOCATE(nnzPerChunk(nProc))
		ALLOCATE(nBand(nProc))
		ALLOCATE(nRecv(nProc))
		nnzPerChunk=0
		DO i=1,thisMatrix%nLocal
		DO j=1,nproc
		@@ -804,45 +815,77 @@ SUBROUTINE assemble_DistributedBandedMatrixType(thisMatrix,ierr)
		CALL thisMatrix%chunks(i)%assemble()
		CALL bandedPList%clear()
		ENDIF
		! Set nbandlocal (to be gathered into nband

		! Decide the sending method to use. 'Full' chunks will send their whole
		! contents while 'sparse' chunks will send band products.
		! Set nTransmit to the number of REAL's to be sent from that processor to
		! this one.
		! >0: This many
		! 0: None (chunk empty)
		! -1: All
		nTransmit=0
		IF(thisMatrix%chunks(i)%isInit) THEN
		IF(2*thisMatrix%chunks(i)%nnz/3 < thisMatrix%chunks(i)%n) THEN
		nBandLocal=SIZE(thisMatrix%chunks(i)%bandIdx)
		IF(thisMatrix%chunks(i)%nnz < thisMatrix%chunks(i)%n) THEN
		DO j=1,SIZE(thisMatrix%chunks(i)%bandIdx)
		nTransmit=nTransmit+SIZE(thisMatrix%chunks(i)%bands(j)%jIdx)
		ENDDO
		ELSE
		nBandLocal=-1
		nTransmit=-1
		ENDIF
		ELSE
		nBandLocal=0
		ENDIF
		! This info must be disseminated through all procs

		! Gather to rank i-1 into nBand(:)
		CALL MPI_Gather(nBandLocal,1,MPI_INTEGER,nband(1),1,MPI_INTEGER,i-1, &
		! Gather to rank i-1 into nRecv(:)
		CALL MPI_Gather(nTransmit,1,MPI_INTEGER,nRecv,1,MPI_INTEGER,i-1, &
		thisMatrix%comm,mpierr)
		! Have rank i-1 allocate
		IF(rank == i-1) THEN
		nRecv(i)=0
		nTransmit=0
		DO j=1,SIZE(nRecv)
		IF (nRecv(j) > 0) THEN
		nTransmit=nTransmit+nRecv(j)
		ENDIF
		ENDDO
		ALLOCATE(thisMatrix%incIdxMap(nTransmit))
		ALLOCATE(thisMatrix%incIdxStt(nProc))
		ALLOCATE(thisMatrix%incIdxStp(nProc))
		thisMatrix%incIdxStt=0
		thisMatrix%incIdxStp=0
		! Use nTransmit as counter
		nTransmit=1
		DO j=1,nProc
		IF(j /= i) THEN
		IF(nBand(j) > 0) THEN
		ALLOCATE(thisMatrix%bandSizes(j)%p(nBand(j)))
		CALL MPI_Recv(thisMatrix%bandSizes(j)%p(1),nBand(j),MPI_INTEGER, &
		j-1, 0, thisMatrix%comm,MPI_STATUS_IGNORE, mpierr)
		IF(j == i) THEN
		thisMatrix%incIdxStt(j)=0
		thisMatrix%incIdxStp(j)=0
		ELSE
		IF(nBand(j) /= 0) THEN
		ALLOCATE(thisMatrix%bandSizes(j)%p(1))
		thisMatrix%bandSizes(j)%p(1)=-1
		ENDIF
		IF(nRecv(j) > 0) THEN
		thisMatrix%incIdxStt(j)=nTransmit
		thisMatrix%incIdxStp(j)=nTransmit+nRecv(j)-1
		! We receive data (and don't want to bother sending idx's at mvmult
		! time)
		CALL MPI_Recv(thisMatrix%incIdxMap(nTransmit:nTransmit+nRecv(j)-1), &
		nRecv(j),MPI_INTEGER,j-1,0,thisMatrix%comm,MPI_STATUS_IGNORE, mpierr)
		nTransmit=nTransmit+nRecv(j)
		ELSE IF (nRecv(j) == -1) THEN
		thisMatrix%incIdxStt(j)=-1
		thisMatrix%incIdxStp(j)=-1
		ENDIF
		ENDIF
		ENDDO
		ELSE
		IF(nBandLocal > 0) THEN
		ALLOCATE(bandSizeTmp(nBandLocal))
		DO j=1,nBandLocal
		bandSizeTmp(j)=SIZE(thisMatrix%chunks(i)%bands(j)%elem)
		! Build the index map
		IF(nTransmit > 0) THEN
		ALLOCATE(idxMapTmp(nTransmit))
		stt=1
		DO j=1,SIZE(thisMatrix%chunks(i)%bandIdx)
		stp=stt+SIZE(thisMatrix%chunks(i)%bands(j)%jIdx)-1
		idxMapTmp(stt:stp)=thisMatrix%chunks(i)%bands(j)%jIdx-thisMatrix%chunks(i)%bandIdx(j)
		stt=stp+1
		ENDDO
		CALL MPI_Send(bandSizeTmp(:),nBandLocal,MPI_INTEGER,i-1,0, &
		CALL MPI_Send(idxMapTmp,nTransmit,MPI_INTEGER,i-1,0, &
		thisMatrix%comm,mpierr)
		DEALLOCATE(bandSizeTmp)
		DEALLOCATE(idxMapTmp)
		ENDIF
		ENDIF
		ENDDO
		@@ -1039,12 +1082,9 @@ SUBROUTINE clear_DistributedBandedMatrixType(matrix)
		ENDDO
		DEALLOCATE(matrix%chunks)
		ENDIF
		IF(ALLOCATED(matrix%bandSizes)) THEN
		DO i=1,SIZE(matrix%bandSizes)
		IF(ALLOCATED(matrix%bandSizes(i)%p)) DEALLOCATE(matrix%bandSizes(i)%p)
		ENDDO
		DEALLOCATE(matrix%bandSizes)
		ENDIF
		IF(ALLOCATED(matrix%incIdxMap)) DEALLOCATE(matrix%incIdxMap)
		IF(ALLOCATED(matrix%incIdxStt)) DEALLOCATE(matrix%incIdxStt)
		IF(ALLOCATED(matrix%incIdxStp)) DEALLOCATE(matrix%incIdxStp)
		IF(ALLOCATED(matrix%iOffsets)) DEALLOCATE(matrix%iOffsets)
		IF(ALLOCATED(matrix%jOffsets)) DEALLOCATE(matrix%jOffsets)
		IF(ALLOCATED(matrix%iTmp)) DEALLOCATE(matrix%iTmp)

src/PreconditionerTypes.f90

+1 −1

Original line number	Diff line number	Diff line
		@@ -758,7 +758,7 @@ SUBROUTINE init_DistributedSOR_PreCondtype(thisPC,A,params)

		thisPC%A => A
		!get omega from the parameter list
		thisPC%omega = 0.0_SRK
		thisPC%omega = 1.0_SRK
		IF (params%has('PreCondType->omega')) &
		CALL params%get('PreCondType->omega',thisPC%omega)

src/VectorTypes.f90

+6 −0

Original line number	Diff line number	Diff line
		@@ -420,6 +420,9 @@ SUBROUTINE VectorResembleAllocScal(dest, source, inparams)
		ENDIF
		#ifdef HAVE_MPI
		TYPE IS(NativeDistributedVectorType)
		IF(.NOT. params%has("VectorType->nlocal")) THEN
		CALL params%add("VectorType->nlocal",SIZE(source%b))
		ENDIF
		IF(.NOT. params%has("VectorType->chunkSize")) THEN
		CALL params%add("VectorType->chunkSize",source%chunkSize)
		ENDIF
		@@ -497,6 +500,9 @@ SUBROUTINE VectorResembleAllocArray(dest, source, nvec, inparams)
		ENDIF
		#ifdef HAVE_MPI
		TYPE IS(NativeDistributedVectorType)
		IF(.NOT. params%has("VectorType->nlocal")) THEN
		CALL params%add("VectorType->nlocal",SIZE(source%b))
		ENDIF
		IF(.NOT. params%has("VectorType->chunkSize")) THEN
		CALL params%add("VectorType->chunkSize",source%chunkSize)
		ENDIF

unit_tests/testMatrixTypes/testMatrixTypesParallel.f90

+34 −34

File changed.

Preview size limit exceeded, changes collapsed.

Admin message