Loading src/Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -9,6 +9,7 @@ OBJS=\ setup_matrix.o \ setup_vbatch.o \ unsetup_vbatch.o \ setup_nC.o \ setup_sparse_batch.o \ unsetup_sparse_batch.o \ apply_Htarget_sparse.o \ Loading src/estimate_work.c +86 −4 Original line number Diff line number Diff line #include "test_vbatch.h" void get_total_memory( int noperator, int npatches, FpType **Amatrix_, int *ld_Amatrix_, FpType **Bmatrix_, int *ld_Bmatrix_, int left_patch_size_[], int right_patch_size_[], size_t *ptotal_memory_in_nbytes ) { double total_gflops = 0; double gmemA = 0; double gmemB = 0; double gmemBX = 0; double gmemXY = 0; size_t nbytes = 0; nbytes = sizeof(IntegerType)*npatches*npatches; IntegerType *nC_ = (IntegerType *) malloc(nbytes); assert( nC_ != 0 ); nbytes = sizeof(IntegerType)*npatches*npatches*noperator; IntegerType *gnnz_A_ = (IntegerType *) malloc( nbytes ); assert( gnnz_A_ != 0 ); nbytes = sizeof(IntegerType)*npatches*npatches*noperator; IntegerType *gnnz_B_ = (IntegerType *) malloc( nbytes ); assert( gnnz_B_ != 0 ); setup_nC( noperator, npatches, Amatrix_, ld_Amatrix_, Bmatrix_, ld_Bmatrix_, left_patch_size_, right_patch_size_, nC_, gnnz_A_, gnnz_B_ ); estimate_work( npatches, left_patch_size_, right_patch_size_, nC_, &total_gflops, &gmemA, &gmemB, &gmemBX, &gmemXY ); size_t total_memory_in_bytes = sizeof(FpType) * (gmemA + gmemB + gmemBX + gmemXY); free( nC_ ); free( gnnz_A_ ); free( gnnz_B_ ); *ptotal_memory_in_nbytes = total_memory_in_bytes; } void estimate_work( int npatches, int left_patch_size_[], int right_patch_size_[], Loading @@ -7,7 +71,8 @@ void estimate_work( int npatches, double *ptotal_gflops, double *pgmemA, double *pgmemB, double *pgmemBX ) double *pgmemBX, double *pgmemXY ) #define nC(ipatch,jpatch) nC_[ ((ipatch)-1) + ((jpatch)-1)*npatches ] #define left_patch_size(ipatch) left_patch_size_[(ipatch)-1] #define right_patch_size(ipatch) right_patch_size_[(ipatch)-1] Loading @@ -17,6 +82,7 @@ void estimate_work( int npatches, estimate total work ------------------- */ const int ialign = 32; const double giga = 1000.0 * 1000.0 * 1000.0; assert( ptotal_gflops != NULL ); Loading @@ -27,12 +93,24 @@ void estimate_work( int npatches, double gmemA = 0.0; double gmemB = 0.0; double gmemBX = 0.0; double gmemXY = 0.0; double total_flops = 0.0; { int ipatch = 0; int jpatch = 0; for(ipatch=1; ipatch <= npatches; ipatch++) { gmemXY += left_patch_size(ipatch) * right_patch_size(ipatch); }; /* * ----------------------- * count both X, Y vectors * ----------------------- */ gmemXY *= 2; for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { Loading @@ -56,9 +134,12 @@ void estimate_work( int npatches, int ncolB = right_patch_size(jpatch); int ncolX = ncolA; gmemA += nop * nrowA * ncolA; gmemB += nop * nrowB * ncolB; gmemBX += nop * nrowB * ncolX; int ldA = ialign * (( nrowA + (ialign-1))/ialign ); int ldB = ialign * (( nrowB + (ialign-1))/ialign ); int ldBX = ldB; gmemA += nop * ldA * ncolA; gmemB += nop * ldB * ncolB; gmemBX += nop * ldBX * ncolX; cal_kron_flops( nrowA, nrowB, ncolA, ncolB, &flops_total, &flops_method1, &flops_method2); Loading @@ -74,5 +155,6 @@ void estimate_work( int npatches, *pgmemA = gmemA; *pgmemB = gmemB; *pgmemBX = gmemBX; *pgmemXY = gmemXY; } src/estimate_work.h +12 −1 Original line number Diff line number Diff line Loading @@ -13,8 +13,19 @@ void estimate_work( int npatches, double *ptotal_gflops, double *pgmemA, double *pgmemB, double *pgmemBX double *pgmemBX, double *pgmemXY ); extern void get_total_memory( int noperator, int npatches, FpType **Amatrix_, int *ld_Amatrix_, FpType **Bmatrix_, int *ld_Bmatrix_, int left_patch_size_[], int right_patch_size_[], size_t *ptotal_memory_in_nbytes ); #ifdef __cplusplus } #endif Loading src/setup_nC.c 0 → 100644 +147 −0 Original line number Diff line number Diff line #include <stdlib.h> #include <assert.h> #include <inttypes.h> #include <math.h> #include "dmrg_vbatch.h" #include "setup_nC.h" #ifndef MIN #define MIN(x,y) (((x) < (y))?(x):(y)) #endif #define index3(ipatch,jpatch,ioperator) \ ( ( ((ipatch)-1) + (((jpatch)-1)*(npatches)) ) + \ ( ((ioperator)-1)*((npatches)*(npatches)) ) ) #define gnnz_A(ipatch,jpatch,ioperator) gnnz_A_[index3(ipatch,jpatch,ioperator)] #define gnnz_B(ipatch,jpatch,ioperator) gnnz_B_[ index3(ipatch,jpatch,ioperator)] #define Amatrix(ipatch,jpatch,ioperator) Amatrix_[ index3(ipatch,jpatch,ioperator) ] #define Bmatrix(ipatch,jpatch,ioperator) Bmatrix_[ index3(ipatch,jpatch,ioperator) ] #define ld_Amatrix(ipatch,jpatch,ioperator) ld_Amatrix_[ index3(ipatch,jpatch,ioperator) ] #define ld_Bmatrix(ipatch,jpatch,ioperator) ld_Bmatrix_[ index3(ipatch,jpatch,ioperator) ] #define nC(i,j) nC_[ indx2f(i,j,npatches)] #define left_patch_size(ipatch) left_patch_size_[ (ipatch)-1] #define right_patch_size(ipatch) right_patch_size_[ (ipatch)-1] void setup_nC( int noperator, int npatches, /* * --------- * intent(in) * --------- */ FpType **Amatrix_, int *ld_Amatrix_, FpType **Bmatrix_, int *ld_Bmatrix_, int left_patch_size_[], /* sizes of left patches (INPUT) */ int right_patch_size_[], /* sizes of right patches (INPUT) */ /* * -------------------------------- * intent(out) but assume storage already allocated * -------------------------------- */ IntegerType nC_[], IntegerType gnnz_A_[], IntegerType gnnz_B_[] ) { int ipatch = 0; int jpatch = 0; int ioperator = 0; for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { nC(ipatch,jpatch) = 0; }; }; for(ioperator=1; ioperator <= noperator; ioperator++) { for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { #define Amat(i,j) Amat_[indx2f(i,j,ld_Amat)] #define Bmat(i,j) Bmat_[indx2f(i,j,ld_Bmat)] FpType *Amat_ = Amatrix(ipatch,jpatch,ioperator); FpType *Bmat_ = Bmatrix(ipatch,jpatch,ioperator); int ld_Amat = ld_Amatrix(ipatch,jpatch,ioperator); int ld_Bmat = ld_Bmatrix(ipatch,jpatch,ioperator); int nrowA = left_patch_size(ipatch); int ncolA = left_patch_size(jpatch); int nrowB = right_patch_size(ipatch); int ncolB = right_patch_size(jpatch); int nnz_Amat = 0; int nnz_Bmat = 0; /* -------------------------------------------------------------- Note: treat a NULL pointer as pointer to a matrix of all zeros -------------------------------------------------------------- */ if (Amat_ == NULL) { nnz_Amat = 0; } else { int ia = 0; int ja = 0; for(ja=1; ja <= ncolA; ja++) { for(ia=1; ia <= nrowA; ia++) { nnz_Amat += ((Amat(ia,ja) == 0)? 0 : 1); }; }; }; if (Bmat_ == NULL) { nnz_Bmat = 0; } else { int ib = 0; int jb = 0; for(jb=1; jb <= ncolB; jb++) { for(ib=1; ib <= nrowB; ib++) { nnz_Bmat += ((Bmat(ib,jb) == 0) ? 0 : 1); }; }; }; gnnz_A(ipatch,jpatch,ioperator) = nnz_Amat; gnnz_B(ipatch,jpatch,ioperator) = nnz_Bmat; int is_zero_Amat = (nnz_Amat == 0); int is_zero_Bmat = (nnz_Bmat == 0); if (is_zero_Amat || is_zero_Bmat) { /* --------------------------------------------------------- ignore this operator since kron(Amat,Bmat) is zero matrix --------------------------------------------------------- */ } else { /* ------------------------------------------ pair of non-zero Amat(k), Bmat(k) matrices ------------------------------------------ */ nC(ipatch,jpatch) = nC(ipatch,jpatch) + 1; }; }; /* end for jpatch */ }; /* end for ipatch */ }; /* end for ioperator */ } src/setup_nC.h 0 → 100644 +44 −0 Original line number Diff line number Diff line #ifndef SETUP_NC_H #define SETUP_NC_H 1 #include "dmrg_types.h" #ifdef __cplusplus extern "C" { #endif extern void setup_nC( int noperator, int npatches, /* * --------- * intent(in) * --------- */ FpType **Amatrix_, int *ld_Amatrix_, FpType **Bmatrix_, int *ld_Bmatrix_, int left_patch_size_[], /* sizes of left patches (INPUT) */ int right_patch_size_[], /* sizes of right patches (INPUT) */ /* * -------------------------------- * intent(out) but assume storage already allocated * -------------------------------- */ IntegerType nC_[], IntegerType gnnz_A_[], IntegerType gnnz_B_[] ); #ifdef __cplusplus } #endif #endif Loading
src/Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -9,6 +9,7 @@ OBJS=\ setup_matrix.o \ setup_vbatch.o \ unsetup_vbatch.o \ setup_nC.o \ setup_sparse_batch.o \ unsetup_sparse_batch.o \ apply_Htarget_sparse.o \ Loading
src/estimate_work.c +86 −4 Original line number Diff line number Diff line #include "test_vbatch.h" void get_total_memory( int noperator, int npatches, FpType **Amatrix_, int *ld_Amatrix_, FpType **Bmatrix_, int *ld_Bmatrix_, int left_patch_size_[], int right_patch_size_[], size_t *ptotal_memory_in_nbytes ) { double total_gflops = 0; double gmemA = 0; double gmemB = 0; double gmemBX = 0; double gmemXY = 0; size_t nbytes = 0; nbytes = sizeof(IntegerType)*npatches*npatches; IntegerType *nC_ = (IntegerType *) malloc(nbytes); assert( nC_ != 0 ); nbytes = sizeof(IntegerType)*npatches*npatches*noperator; IntegerType *gnnz_A_ = (IntegerType *) malloc( nbytes ); assert( gnnz_A_ != 0 ); nbytes = sizeof(IntegerType)*npatches*npatches*noperator; IntegerType *gnnz_B_ = (IntegerType *) malloc( nbytes ); assert( gnnz_B_ != 0 ); setup_nC( noperator, npatches, Amatrix_, ld_Amatrix_, Bmatrix_, ld_Bmatrix_, left_patch_size_, right_patch_size_, nC_, gnnz_A_, gnnz_B_ ); estimate_work( npatches, left_patch_size_, right_patch_size_, nC_, &total_gflops, &gmemA, &gmemB, &gmemBX, &gmemXY ); size_t total_memory_in_bytes = sizeof(FpType) * (gmemA + gmemB + gmemBX + gmemXY); free( nC_ ); free( gnnz_A_ ); free( gnnz_B_ ); *ptotal_memory_in_nbytes = total_memory_in_bytes; } void estimate_work( int npatches, int left_patch_size_[], int right_patch_size_[], Loading @@ -7,7 +71,8 @@ void estimate_work( int npatches, double *ptotal_gflops, double *pgmemA, double *pgmemB, double *pgmemBX ) double *pgmemBX, double *pgmemXY ) #define nC(ipatch,jpatch) nC_[ ((ipatch)-1) + ((jpatch)-1)*npatches ] #define left_patch_size(ipatch) left_patch_size_[(ipatch)-1] #define right_patch_size(ipatch) right_patch_size_[(ipatch)-1] Loading @@ -17,6 +82,7 @@ void estimate_work( int npatches, estimate total work ------------------- */ const int ialign = 32; const double giga = 1000.0 * 1000.0 * 1000.0; assert( ptotal_gflops != NULL ); Loading @@ -27,12 +93,24 @@ void estimate_work( int npatches, double gmemA = 0.0; double gmemB = 0.0; double gmemBX = 0.0; double gmemXY = 0.0; double total_flops = 0.0; { int ipatch = 0; int jpatch = 0; for(ipatch=1; ipatch <= npatches; ipatch++) { gmemXY += left_patch_size(ipatch) * right_patch_size(ipatch); }; /* * ----------------------- * count both X, Y vectors * ----------------------- */ gmemXY *= 2; for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { Loading @@ -56,9 +134,12 @@ void estimate_work( int npatches, int ncolB = right_patch_size(jpatch); int ncolX = ncolA; gmemA += nop * nrowA * ncolA; gmemB += nop * nrowB * ncolB; gmemBX += nop * nrowB * ncolX; int ldA = ialign * (( nrowA + (ialign-1))/ialign ); int ldB = ialign * (( nrowB + (ialign-1))/ialign ); int ldBX = ldB; gmemA += nop * ldA * ncolA; gmemB += nop * ldB * ncolB; gmemBX += nop * ldBX * ncolX; cal_kron_flops( nrowA, nrowB, ncolA, ncolB, &flops_total, &flops_method1, &flops_method2); Loading @@ -74,5 +155,6 @@ void estimate_work( int npatches, *pgmemA = gmemA; *pgmemB = gmemB; *pgmemBX = gmemBX; *pgmemXY = gmemXY; }
src/estimate_work.h +12 −1 Original line number Diff line number Diff line Loading @@ -13,8 +13,19 @@ void estimate_work( int npatches, double *ptotal_gflops, double *pgmemA, double *pgmemB, double *pgmemBX double *pgmemBX, double *pgmemXY ); extern void get_total_memory( int noperator, int npatches, FpType **Amatrix_, int *ld_Amatrix_, FpType **Bmatrix_, int *ld_Bmatrix_, int left_patch_size_[], int right_patch_size_[], size_t *ptotal_memory_in_nbytes ); #ifdef __cplusplus } #endif Loading
src/setup_nC.c 0 → 100644 +147 −0 Original line number Diff line number Diff line #include <stdlib.h> #include <assert.h> #include <inttypes.h> #include <math.h> #include "dmrg_vbatch.h" #include "setup_nC.h" #ifndef MIN #define MIN(x,y) (((x) < (y))?(x):(y)) #endif #define index3(ipatch,jpatch,ioperator) \ ( ( ((ipatch)-1) + (((jpatch)-1)*(npatches)) ) + \ ( ((ioperator)-1)*((npatches)*(npatches)) ) ) #define gnnz_A(ipatch,jpatch,ioperator) gnnz_A_[index3(ipatch,jpatch,ioperator)] #define gnnz_B(ipatch,jpatch,ioperator) gnnz_B_[ index3(ipatch,jpatch,ioperator)] #define Amatrix(ipatch,jpatch,ioperator) Amatrix_[ index3(ipatch,jpatch,ioperator) ] #define Bmatrix(ipatch,jpatch,ioperator) Bmatrix_[ index3(ipatch,jpatch,ioperator) ] #define ld_Amatrix(ipatch,jpatch,ioperator) ld_Amatrix_[ index3(ipatch,jpatch,ioperator) ] #define ld_Bmatrix(ipatch,jpatch,ioperator) ld_Bmatrix_[ index3(ipatch,jpatch,ioperator) ] #define nC(i,j) nC_[ indx2f(i,j,npatches)] #define left_patch_size(ipatch) left_patch_size_[ (ipatch)-1] #define right_patch_size(ipatch) right_patch_size_[ (ipatch)-1] void setup_nC( int noperator, int npatches, /* * --------- * intent(in) * --------- */ FpType **Amatrix_, int *ld_Amatrix_, FpType **Bmatrix_, int *ld_Bmatrix_, int left_patch_size_[], /* sizes of left patches (INPUT) */ int right_patch_size_[], /* sizes of right patches (INPUT) */ /* * -------------------------------- * intent(out) but assume storage already allocated * -------------------------------- */ IntegerType nC_[], IntegerType gnnz_A_[], IntegerType gnnz_B_[] ) { int ipatch = 0; int jpatch = 0; int ioperator = 0; for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { nC(ipatch,jpatch) = 0; }; }; for(ioperator=1; ioperator <= noperator; ioperator++) { for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { #define Amat(i,j) Amat_[indx2f(i,j,ld_Amat)] #define Bmat(i,j) Bmat_[indx2f(i,j,ld_Bmat)] FpType *Amat_ = Amatrix(ipatch,jpatch,ioperator); FpType *Bmat_ = Bmatrix(ipatch,jpatch,ioperator); int ld_Amat = ld_Amatrix(ipatch,jpatch,ioperator); int ld_Bmat = ld_Bmatrix(ipatch,jpatch,ioperator); int nrowA = left_patch_size(ipatch); int ncolA = left_patch_size(jpatch); int nrowB = right_patch_size(ipatch); int ncolB = right_patch_size(jpatch); int nnz_Amat = 0; int nnz_Bmat = 0; /* -------------------------------------------------------------- Note: treat a NULL pointer as pointer to a matrix of all zeros -------------------------------------------------------------- */ if (Amat_ == NULL) { nnz_Amat = 0; } else { int ia = 0; int ja = 0; for(ja=1; ja <= ncolA; ja++) { for(ia=1; ia <= nrowA; ia++) { nnz_Amat += ((Amat(ia,ja) == 0)? 0 : 1); }; }; }; if (Bmat_ == NULL) { nnz_Bmat = 0; } else { int ib = 0; int jb = 0; for(jb=1; jb <= ncolB; jb++) { for(ib=1; ib <= nrowB; ib++) { nnz_Bmat += ((Bmat(ib,jb) == 0) ? 0 : 1); }; }; }; gnnz_A(ipatch,jpatch,ioperator) = nnz_Amat; gnnz_B(ipatch,jpatch,ioperator) = nnz_Bmat; int is_zero_Amat = (nnz_Amat == 0); int is_zero_Bmat = (nnz_Bmat == 0); if (is_zero_Amat || is_zero_Bmat) { /* --------------------------------------------------------- ignore this operator since kron(Amat,Bmat) is zero matrix --------------------------------------------------------- */ } else { /* ------------------------------------------ pair of non-zero Amat(k), Bmat(k) matrices ------------------------------------------ */ nC(ipatch,jpatch) = nC(ipatch,jpatch) + 1; }; }; /* end for jpatch */ }; /* end for ipatch */ }; /* end for ioperator */ }
src/setup_nC.h 0 → 100644 +44 −0 Original line number Diff line number Diff line #ifndef SETUP_NC_H #define SETUP_NC_H 1 #include "dmrg_types.h" #ifdef __cplusplus extern "C" { #endif extern void setup_nC( int noperator, int npatches, /* * --------- * intent(in) * --------- */ FpType **Amatrix_, int *ld_Amatrix_, FpType **Bmatrix_, int *ld_Bmatrix_, int left_patch_size_[], /* sizes of left patches (INPUT) */ int right_patch_size_[], /* sizes of right patches (INPUT) */ /* * -------------------------------- * intent(out) but assume storage already allocated * -------------------------------- */ IntegerType nC_[], IntegerType gnnz_A_[], IntegerType gnnz_B_[] ); #ifdef __cplusplus } #endif #endif