Loading src/apply_Htarget_pvbatch.c +13 −4 Original line number Diff line number Diff line Loading @@ -25,12 +25,15 @@ void apply_Htarget_pvbatch( #define Y(i) Y_[(i)-1] { const int ialign = 32; const double giga = 1000.0 * 1000.0 * 1000.0; double gflops1 = (double) 0.0; double gflops2 = (double) 0.0; double time_1st_vbatch = (double) 0.0; double time_2nd_vbatch = (double) 0.0; size_t nbytes_BX = 0; /* ------------------ compute Y = H * X Loading Loading @@ -140,7 +143,8 @@ void apply_Htarget_pvbatch( int ld_Abatch = descAbatch_[LLD_]; int ld_Bbatch = descBbatch_[LLD_]; double *BX_ = (double *) dmrg_malloc( (sizeof(double) * ld_BX) * Locq_BX ); nbytes_BX = ( (sizeof(double) * ld_BX) * Locq_BX ); double *BX_ = (double *) dmrg_malloc( nbytes_BX ); assert( BX_ != NULL ); #define BX(i,j) BX_[ indx2f(i,j,ld_BX) ] Loading Loading @@ -287,7 +291,7 @@ void apply_Htarget_pvbatch( #ifdef _OPENMP time_1st_vbatch += omp_get_wtime(); #endif gflops1 = gflops1/(1000.0*1000.0*1000.0); gflops1 = gflops1/(giga); Loading Loading @@ -362,14 +366,19 @@ void apply_Htarget_pvbatch( ngroups, group_size_ ); #ifdef _OPENMP time_2nd_vbatch += omp_get_wtime(); gflops2 = gflops2/(1000.0*1000.0*1000.0); gflops2 = gflops2/(giga); printf("1st vbatch %f gflops (gflops1=%lf,time=%lf)\n", gflops1/time_1st_vbatch, gflops1, time_1st_vbatch ); printf("2nd vbatch %f gflops (gflops2=%lf,time=%lf)\n", gflops2/time_2nd_vbatch, gflops2, time_2nd_vbatch ); printf("overall %f gflops\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); printf("overall %f gflops\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); printf("apply_Htarget_pvbatch:memory BX (%f GBytes)\n", (double) nbytes_BX/(giga) ); #endif Loading src/apply_Htarget_sparse.c +18 −5 Original line number Diff line number Diff line Loading @@ -41,11 +41,16 @@ void apply_Htarget_sparse( #define Y(i) Y_[(i)-1] { const double giga = 1000.0*1000.0*1000.0; const int idebug = 1; const int ialign = 32; double total_time = -dmrg_get_wtime(); size_t nbytes_X = 0; size_t nbytes_Y = 0; size_t nbytes_BX = 0; double gflops1 = 0.0; double gflops2 = 0.0; double time_1st_vbatch = 0.0; Loading Loading @@ -104,7 +109,8 @@ void apply_Htarget_sparse( * ----------------------- */ if (need_allocate_X) { X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); nbytes_X = sizeof(FpType) * xy_size_dim; X_ = (FpType *) dmrg_malloc( nbytes_X ); assert( X_ != NULL ); void *dest = (void *) &(X_[0]); void *src = (void *) &(Xin_[0]); Loading @@ -113,7 +119,8 @@ void apply_Htarget_sparse( }; if (need_allocate_Y) { Y_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); nbytes_Y = sizeof(FpType) * xy_size_dim; Y_ = (FpType *) dmrg_malloc( nbytes_Y ); assert( Y_ != NULL ); }; Loading Loading @@ -173,7 +180,8 @@ void apply_Htarget_sparse( #define gBXbatch(ipatch) gBXbatch_[(ipatch)-1] FpType *pBXmem = (FpType *) dmrg_malloc( sizeof(FpType) * sum_BX_sizes ); nbytes_BX = sizeof(FpType) * sum_BX_sizes; FpType *pBXmem = (FpType *) dmrg_malloc( nbytes_BX ); if (pBXmem == NULL) { printf("apply_Htarget_sparse: sum_BX_sizes=%le\n", (double) sum_BX_sizes); printf("max_nC=%d, sum_nC=%d, nnz_nC=%d\n", Loading Loading @@ -357,7 +365,7 @@ void apply_Htarget_sparse( beta_array_, c_array_, ldc_array_, ngroups, group_size_ ); time_1st_vbatch += dmrg_get_wtime(); gflops1 = gflops1/(1000.0*1000.0*1000.0); gflops1 = gflops1/giga; /* Loading Loading @@ -478,7 +486,7 @@ void apply_Htarget_sparse( beta_array_, c_array_, ldc_array_, ngroups, group_size_ ); time_2nd_vbatch += dmrg_get_wtime(); gflops2 = gflops2/(1000.0*1000.0*1000.0); gflops2 = gflops2/giga; if (idebug >= 1) { printf("1st vbatch %lf gflops/sec (gflops1=%lf,time=%lf)\n", Loading @@ -488,6 +496,7 @@ void apply_Htarget_sparse( printf("overall %lf gflops/sec\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); }; Loading Loading @@ -519,6 +528,10 @@ void apply_Htarget_sparse( if (idebug >= 1) { printf("apply_Htarget_sparse: total_time=%lf \n", total_time); printf("apply_Htarget_sparse:memory BX (%f GBytes) X (%f GBytes) Y (%f GBytes) \n", (double) nbytes_BX/(giga), (double) nbytes_X/(giga), (double) nbytes_Y/(giga)); }; } Loading src/apply_Htarget_vbatch.c +9 −3 Original line number Diff line number Diff line Loading @@ -27,12 +27,15 @@ void apply_Htarget_vbatch( { const int idebug = 1; const int ialign = 32; const double giga = 1000.0*1000.0*1000.0; double gflops1 = (FpType) 0.0; double gflops2 = (FpType) 0.0; double time_1st_vbatch = (FpType) 0.0; double time_2nd_vbatch = (FpType) 0.0; size_t nbytes_BX = 0; /* ------------------ compute Y = H * X Loading Loading @@ -109,7 +112,8 @@ void apply_Htarget_vbatch( int ncolBX = (ncolA * noperator ); int ld_BX = ialign * ICEIL(nrowBX,ialign); FpType *BX_ = (FpType *) dmrg_malloc( (sizeof(FpType) * ld_BX) * (ncolA * noperator) ); nbytes_BX = ( (sizeof(FpType) * ld_BX) * (ncolA * noperator) ); FpType *BX_ = (FpType *) dmrg_malloc( nbytes_BX ); assert( BX_ != NULL ); #define BX(i,j) BX_[ indx2f(i,j,ld_BX) ] Loading Loading @@ -196,7 +200,7 @@ void apply_Htarget_vbatch( beta_array_, c_array_, ldc_array_, ngroups, group_size_ ); time_1st_vbatch += dmrg_get_wtime(); gflops1 = gflops1/(1000.0*1000.0*1000.0); gflops1 = gflops1/(giga); Loading Loading @@ -268,7 +272,7 @@ void apply_Htarget_vbatch( beta_array_, c_array_, ldc_array_, ngroups, group_size_ ); time_2nd_vbatch += dmrg_get_wtime(); gflops2 = gflops2/(1000.0*1000.0*1000.0); gflops2 = gflops2/(giga); if (idebug >= 1) { printf("1st vbatch %lf gflops/sec (gflops1=%lf,time=%lf)\n", Loading @@ -278,6 +282,8 @@ void apply_Htarget_vbatch( printf("overall %lf gflops/sec\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); printf("memory BX(%lf GBytes)\n", (double) nbytes_BX/(giga) ); }; Loading src/dmrg_vbatch.c +45 −16 Original line number Diff line number Diff line Loading @@ -124,9 +124,13 @@ void dmrg_Xgemm_vbatch( char ctransa_array[], { const int idebug = 0; const double giga = 1000.0*1000.0*1000.0; double gflops = 0; double elapsed_time = 0; size_t nbytes = 0; size_t nbytes_total = 0; if (idebug >= 1) { elapsed_time = -dmrg_get_wtime(); int igroup = 0; Loading @@ -139,7 +143,7 @@ double elapsed_time = 0; ( (double) group_size[igroup]) * 2.0; }; gflops = gflops/(1000.0*1000.0*1000.0); gflops = gflops/(giga); }; Loading Loading @@ -215,21 +219,44 @@ double elapsed_time = 0; const int ialign = 32; int vbatch_dim = ialign * ICEIL( (batch_size+1),ialign ); #ifdef USE_MALLOC FpType *alpha_vbatch = (FpType *) dmrg_malloc( sizeof(FpType) * (vbatch_dim)); FpType *beta_vbatch = (FpType *) dmrg_malloc( sizeof(FpType) * (vbatch_dim)); int *m_vbatch = (int *) dmrg_malloc(sizeof(int) * (vbatch_dim)); int *n_vbatch = (int *) dmrg_malloc(sizeof(int) * (vbatch_dim)); int *k_vbatch = (int *) dmrg_malloc(sizeof(int) * (vbatch_dim)); char *transa_vbatch = (char *) dmrg_malloc(sizeof(char) *(vbatch_dim)); char *transb_vbatch = (char *) dmrg_malloc(sizeof(char) *(vbatch_dim)); int *lda_vbatch = (int *) dmrg_malloc(sizeof(int)*(vbatch_dim)); int *ldb_vbatch = (int *) dmrg_malloc(sizeof(int)*(vbatch_dim)); int *ldc_vbatch = (int *) dmrg_malloc(sizeof(int)*(vbatch_dim)); FpType **a_vbatch = (FpType **) dmrg_malloc( sizeof(FpType *) * (vbatch_dim)); FpType **b_vbatch = (FpType **) dmrg_malloc( sizeof(FpType *) * (vbatch_dim)); FpType **c_vbatch = (FpType **) dmrg_malloc( sizeof(FpType *) * (vbatch_dim)); nbytes = sizeof(FpType) * (vbatch_dim); nbytes_total += nbytes; FpType *alpha_vbatch = (FpType *) dmrg_malloc( nbytes ); nbytes = sizeof(FpType) * (vbatch_dim); nbytes_total += nbytes; FpType *beta_vbatch = (FpType *) dmrg_malloc( nbytes ); nbytes = sizeof(int) * (vbatch_dim); nbytes_total += nbytes; int *m_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(int) * (vbatch_dim); nbytes_total += nbytes; int *n_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(int) * (vbatch_dim); nbytes_total += nbytes; int *k_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(char) *(vbatch_dim); nbytes_total += nbytes; char *transa_vbatch = (char *) dmrg_malloc( nbytes ); nbytes = sizeof(char) *(vbatch_dim); nbytes_total += nbytes; char *transb_vbatch = (char *) dmrg_malloc( nbytes ); nbytes = sizeof(int)*(vbatch_dim); nbytes_total += nbytes; int *lda_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(int)*(vbatch_dim); nbytes_total += nbytes; int *ldb_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(int)*(vbatch_dim); nbytes_total += nbytes; int *ldc_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(FpType *) * (vbatch_dim); nbytes_total += nbytes; FpType **a_vbatch = (FpType **) dmrg_malloc( nbytes ); nbytes = sizeof(FpType *) * (vbatch_dim); nbytes_total += nbytes; FpType **b_vbatch = (FpType **) dmrg_malloc( nbytes ); nbytes = sizeof(FpType *) * (vbatch_dim); nbytes_total += nbytes; FpType **c_vbatch = (FpType **) dmrg_malloc( nbytes ); assert( alpha_vbatch != NULL ); assert( beta_vbatch != NULL ); Loading Loading @@ -472,6 +499,8 @@ double elapsed_time = 0; printf("dmrg_vbatch: gflops=%lf, elapsed_time=%lf, gflops/sec=%lf\n", gflops, elapsed_time, gflops_per_sec ); printf("dmrg_vbatch need %lf GBytes\n", (double) nbytes_total/(giga) ); }; } src/setup_sparse_batch.c +12 −2 Original line number Diff line number Diff line Loading @@ -80,9 +80,13 @@ void setup_sparse_batch( const int false = 0; const int true = !false; const int use_Xlacpy = true ; const double giga = 1000.0*1000.0*1000.0; double total_time = -dmrg_get_wtime(); size_t nbytes_Abatch = 0; size_t nbytes_Bbatch = 0; int ipatch = 0; int gnnz_A_[npatches*npatches*noperator]; Loading Loading @@ -341,8 +345,10 @@ void setup_sparse_batch( assert( gAbatch_ != NULL ); assert( gBbatch_ != NULL ); FpType *pAmem = (FpType *) dmrg_malloc( sizeof(FpType) * sum_Abatch_sizes); FpType *pBmem = (FpType *) dmrg_malloc( sizeof(FpType) * sum_Bbatch_sizes); nbytes_Abatch = sizeof(FpType) * sum_Abatch_sizes; nbytes_Bbatch = sizeof(FpType) * sum_Bbatch_sizes; FpType *pAmem = (FpType *) dmrg_malloc( nbytes_Abatch ); FpType *pBmem = (FpType *) dmrg_malloc( nbytes_Bbatch ); assert( pAmem != NULL ); assert( pBmem != NULL ); { Loading Loading @@ -472,6 +478,10 @@ void setup_sparse_batch( if (idebug >= 1) { printf("setup_sparse_batch: total_time = %lf \n", total_time ); printf("setup_sparse_batch:memory Abatch (%lf GBytes) Bbatch (%lf GBytes)\n", (double) nbytes_Abatch/(giga), (double) nbytes_Bbatch/(giga) ); }; } Loading Loading
src/apply_Htarget_pvbatch.c +13 −4 Original line number Diff line number Diff line Loading @@ -25,12 +25,15 @@ void apply_Htarget_pvbatch( #define Y(i) Y_[(i)-1] { const int ialign = 32; const double giga = 1000.0 * 1000.0 * 1000.0; double gflops1 = (double) 0.0; double gflops2 = (double) 0.0; double time_1st_vbatch = (double) 0.0; double time_2nd_vbatch = (double) 0.0; size_t nbytes_BX = 0; /* ------------------ compute Y = H * X Loading Loading @@ -140,7 +143,8 @@ void apply_Htarget_pvbatch( int ld_Abatch = descAbatch_[LLD_]; int ld_Bbatch = descBbatch_[LLD_]; double *BX_ = (double *) dmrg_malloc( (sizeof(double) * ld_BX) * Locq_BX ); nbytes_BX = ( (sizeof(double) * ld_BX) * Locq_BX ); double *BX_ = (double *) dmrg_malloc( nbytes_BX ); assert( BX_ != NULL ); #define BX(i,j) BX_[ indx2f(i,j,ld_BX) ] Loading Loading @@ -287,7 +291,7 @@ void apply_Htarget_pvbatch( #ifdef _OPENMP time_1st_vbatch += omp_get_wtime(); #endif gflops1 = gflops1/(1000.0*1000.0*1000.0); gflops1 = gflops1/(giga); Loading Loading @@ -362,14 +366,19 @@ void apply_Htarget_pvbatch( ngroups, group_size_ ); #ifdef _OPENMP time_2nd_vbatch += omp_get_wtime(); gflops2 = gflops2/(1000.0*1000.0*1000.0); gflops2 = gflops2/(giga); printf("1st vbatch %f gflops (gflops1=%lf,time=%lf)\n", gflops1/time_1st_vbatch, gflops1, time_1st_vbatch ); printf("2nd vbatch %f gflops (gflops2=%lf,time=%lf)\n", gflops2/time_2nd_vbatch, gflops2, time_2nd_vbatch ); printf("overall %f gflops\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); printf("overall %f gflops\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); printf("apply_Htarget_pvbatch:memory BX (%f GBytes)\n", (double) nbytes_BX/(giga) ); #endif Loading
src/apply_Htarget_sparse.c +18 −5 Original line number Diff line number Diff line Loading @@ -41,11 +41,16 @@ void apply_Htarget_sparse( #define Y(i) Y_[(i)-1] { const double giga = 1000.0*1000.0*1000.0; const int idebug = 1; const int ialign = 32; double total_time = -dmrg_get_wtime(); size_t nbytes_X = 0; size_t nbytes_Y = 0; size_t nbytes_BX = 0; double gflops1 = 0.0; double gflops2 = 0.0; double time_1st_vbatch = 0.0; Loading Loading @@ -104,7 +109,8 @@ void apply_Htarget_sparse( * ----------------------- */ if (need_allocate_X) { X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); nbytes_X = sizeof(FpType) * xy_size_dim; X_ = (FpType *) dmrg_malloc( nbytes_X ); assert( X_ != NULL ); void *dest = (void *) &(X_[0]); void *src = (void *) &(Xin_[0]); Loading @@ -113,7 +119,8 @@ void apply_Htarget_sparse( }; if (need_allocate_Y) { Y_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); nbytes_Y = sizeof(FpType) * xy_size_dim; Y_ = (FpType *) dmrg_malloc( nbytes_Y ); assert( Y_ != NULL ); }; Loading Loading @@ -173,7 +180,8 @@ void apply_Htarget_sparse( #define gBXbatch(ipatch) gBXbatch_[(ipatch)-1] FpType *pBXmem = (FpType *) dmrg_malloc( sizeof(FpType) * sum_BX_sizes ); nbytes_BX = sizeof(FpType) * sum_BX_sizes; FpType *pBXmem = (FpType *) dmrg_malloc( nbytes_BX ); if (pBXmem == NULL) { printf("apply_Htarget_sparse: sum_BX_sizes=%le\n", (double) sum_BX_sizes); printf("max_nC=%d, sum_nC=%d, nnz_nC=%d\n", Loading Loading @@ -357,7 +365,7 @@ void apply_Htarget_sparse( beta_array_, c_array_, ldc_array_, ngroups, group_size_ ); time_1st_vbatch += dmrg_get_wtime(); gflops1 = gflops1/(1000.0*1000.0*1000.0); gflops1 = gflops1/giga; /* Loading Loading @@ -478,7 +486,7 @@ void apply_Htarget_sparse( beta_array_, c_array_, ldc_array_, ngroups, group_size_ ); time_2nd_vbatch += dmrg_get_wtime(); gflops2 = gflops2/(1000.0*1000.0*1000.0); gflops2 = gflops2/giga; if (idebug >= 1) { printf("1st vbatch %lf gflops/sec (gflops1=%lf,time=%lf)\n", Loading @@ -488,6 +496,7 @@ void apply_Htarget_sparse( printf("overall %lf gflops/sec\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); }; Loading Loading @@ -519,6 +528,10 @@ void apply_Htarget_sparse( if (idebug >= 1) { printf("apply_Htarget_sparse: total_time=%lf \n", total_time); printf("apply_Htarget_sparse:memory BX (%f GBytes) X (%f GBytes) Y (%f GBytes) \n", (double) nbytes_BX/(giga), (double) nbytes_X/(giga), (double) nbytes_Y/(giga)); }; } Loading
src/apply_Htarget_vbatch.c +9 −3 Original line number Diff line number Diff line Loading @@ -27,12 +27,15 @@ void apply_Htarget_vbatch( { const int idebug = 1; const int ialign = 32; const double giga = 1000.0*1000.0*1000.0; double gflops1 = (FpType) 0.0; double gflops2 = (FpType) 0.0; double time_1st_vbatch = (FpType) 0.0; double time_2nd_vbatch = (FpType) 0.0; size_t nbytes_BX = 0; /* ------------------ compute Y = H * X Loading Loading @@ -109,7 +112,8 @@ void apply_Htarget_vbatch( int ncolBX = (ncolA * noperator ); int ld_BX = ialign * ICEIL(nrowBX,ialign); FpType *BX_ = (FpType *) dmrg_malloc( (sizeof(FpType) * ld_BX) * (ncolA * noperator) ); nbytes_BX = ( (sizeof(FpType) * ld_BX) * (ncolA * noperator) ); FpType *BX_ = (FpType *) dmrg_malloc( nbytes_BX ); assert( BX_ != NULL ); #define BX(i,j) BX_[ indx2f(i,j,ld_BX) ] Loading Loading @@ -196,7 +200,7 @@ void apply_Htarget_vbatch( beta_array_, c_array_, ldc_array_, ngroups, group_size_ ); time_1st_vbatch += dmrg_get_wtime(); gflops1 = gflops1/(1000.0*1000.0*1000.0); gflops1 = gflops1/(giga); Loading Loading @@ -268,7 +272,7 @@ void apply_Htarget_vbatch( beta_array_, c_array_, ldc_array_, ngroups, group_size_ ); time_2nd_vbatch += dmrg_get_wtime(); gflops2 = gflops2/(1000.0*1000.0*1000.0); gflops2 = gflops2/(giga); if (idebug >= 1) { printf("1st vbatch %lf gflops/sec (gflops1=%lf,time=%lf)\n", Loading @@ -278,6 +282,8 @@ void apply_Htarget_vbatch( printf("overall %lf gflops/sec\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); printf("memory BX(%lf GBytes)\n", (double) nbytes_BX/(giga) ); }; Loading
src/dmrg_vbatch.c +45 −16 Original line number Diff line number Diff line Loading @@ -124,9 +124,13 @@ void dmrg_Xgemm_vbatch( char ctransa_array[], { const int idebug = 0; const double giga = 1000.0*1000.0*1000.0; double gflops = 0; double elapsed_time = 0; size_t nbytes = 0; size_t nbytes_total = 0; if (idebug >= 1) { elapsed_time = -dmrg_get_wtime(); int igroup = 0; Loading @@ -139,7 +143,7 @@ double elapsed_time = 0; ( (double) group_size[igroup]) * 2.0; }; gflops = gflops/(1000.0*1000.0*1000.0); gflops = gflops/(giga); }; Loading Loading @@ -215,21 +219,44 @@ double elapsed_time = 0; const int ialign = 32; int vbatch_dim = ialign * ICEIL( (batch_size+1),ialign ); #ifdef USE_MALLOC FpType *alpha_vbatch = (FpType *) dmrg_malloc( sizeof(FpType) * (vbatch_dim)); FpType *beta_vbatch = (FpType *) dmrg_malloc( sizeof(FpType) * (vbatch_dim)); int *m_vbatch = (int *) dmrg_malloc(sizeof(int) * (vbatch_dim)); int *n_vbatch = (int *) dmrg_malloc(sizeof(int) * (vbatch_dim)); int *k_vbatch = (int *) dmrg_malloc(sizeof(int) * (vbatch_dim)); char *transa_vbatch = (char *) dmrg_malloc(sizeof(char) *(vbatch_dim)); char *transb_vbatch = (char *) dmrg_malloc(sizeof(char) *(vbatch_dim)); int *lda_vbatch = (int *) dmrg_malloc(sizeof(int)*(vbatch_dim)); int *ldb_vbatch = (int *) dmrg_malloc(sizeof(int)*(vbatch_dim)); int *ldc_vbatch = (int *) dmrg_malloc(sizeof(int)*(vbatch_dim)); FpType **a_vbatch = (FpType **) dmrg_malloc( sizeof(FpType *) * (vbatch_dim)); FpType **b_vbatch = (FpType **) dmrg_malloc( sizeof(FpType *) * (vbatch_dim)); FpType **c_vbatch = (FpType **) dmrg_malloc( sizeof(FpType *) * (vbatch_dim)); nbytes = sizeof(FpType) * (vbatch_dim); nbytes_total += nbytes; FpType *alpha_vbatch = (FpType *) dmrg_malloc( nbytes ); nbytes = sizeof(FpType) * (vbatch_dim); nbytes_total += nbytes; FpType *beta_vbatch = (FpType *) dmrg_malloc( nbytes ); nbytes = sizeof(int) * (vbatch_dim); nbytes_total += nbytes; int *m_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(int) * (vbatch_dim); nbytes_total += nbytes; int *n_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(int) * (vbatch_dim); nbytes_total += nbytes; int *k_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(char) *(vbatch_dim); nbytes_total += nbytes; char *transa_vbatch = (char *) dmrg_malloc( nbytes ); nbytes = sizeof(char) *(vbatch_dim); nbytes_total += nbytes; char *transb_vbatch = (char *) dmrg_malloc( nbytes ); nbytes = sizeof(int)*(vbatch_dim); nbytes_total += nbytes; int *lda_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(int)*(vbatch_dim); nbytes_total += nbytes; int *ldb_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(int)*(vbatch_dim); nbytes_total += nbytes; int *ldc_vbatch = (int *) dmrg_malloc( nbytes ); nbytes = sizeof(FpType *) * (vbatch_dim); nbytes_total += nbytes; FpType **a_vbatch = (FpType **) dmrg_malloc( nbytes ); nbytes = sizeof(FpType *) * (vbatch_dim); nbytes_total += nbytes; FpType **b_vbatch = (FpType **) dmrg_malloc( nbytes ); nbytes = sizeof(FpType *) * (vbatch_dim); nbytes_total += nbytes; FpType **c_vbatch = (FpType **) dmrg_malloc( nbytes ); assert( alpha_vbatch != NULL ); assert( beta_vbatch != NULL ); Loading Loading @@ -472,6 +499,8 @@ double elapsed_time = 0; printf("dmrg_vbatch: gflops=%lf, elapsed_time=%lf, gflops/sec=%lf\n", gflops, elapsed_time, gflops_per_sec ); printf("dmrg_vbatch need %lf GBytes\n", (double) nbytes_total/(giga) ); }; }
src/setup_sparse_batch.c +12 −2 Original line number Diff line number Diff line Loading @@ -80,9 +80,13 @@ void setup_sparse_batch( const int false = 0; const int true = !false; const int use_Xlacpy = true ; const double giga = 1000.0*1000.0*1000.0; double total_time = -dmrg_get_wtime(); size_t nbytes_Abatch = 0; size_t nbytes_Bbatch = 0; int ipatch = 0; int gnnz_A_[npatches*npatches*noperator]; Loading Loading @@ -341,8 +345,10 @@ void setup_sparse_batch( assert( gAbatch_ != NULL ); assert( gBbatch_ != NULL ); FpType *pAmem = (FpType *) dmrg_malloc( sizeof(FpType) * sum_Abatch_sizes); FpType *pBmem = (FpType *) dmrg_malloc( sizeof(FpType) * sum_Bbatch_sizes); nbytes_Abatch = sizeof(FpType) * sum_Abatch_sizes; nbytes_Bbatch = sizeof(FpType) * sum_Bbatch_sizes; FpType *pAmem = (FpType *) dmrg_malloc( nbytes_Abatch ); FpType *pBmem = (FpType *) dmrg_malloc( nbytes_Bbatch ); assert( pAmem != NULL ); assert( pBmem != NULL ); { Loading Loading @@ -472,6 +478,10 @@ void setup_sparse_batch( if (idebug >= 1) { printf("setup_sparse_batch: total_time = %lf \n", total_time ); printf("setup_sparse_batch:memory Abatch (%lf GBytes) Bbatch (%lf GBytes)\n", (double) nbytes_Abatch/(giga), (double) nbytes_Bbatch/(giga) ); }; } Loading