Loading src/apply_Htarget_sparse.c +8 −2 Original line number Diff line number Diff line Loading @@ -103,7 +103,10 @@ void apply_Htarget_sparse( if (need_allocate_X) { X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); assert( X_ != NULL ); memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size ); void *dest = (void *) &(X_[0]); void *src = (void *) &(Xin_[0]); size_t count = sizeof(FpType) * xy_size; dmrg_memcpy( dest, src, count ); }; if (need_allocate_Y) { Loading Loading @@ -501,7 +504,10 @@ void apply_Htarget_sparse( dmrg_free( X_ ); X_ = NULL; }; if (need_allocate_Y) { memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size ); void *dest = &(Yout_[0]); void *src = &(Y_[0]); size_t count = sizeof(FpType) * xy_size; dmrg_memcpy( dest, src, count ); dmrg_free( Y_ ); Y_ = NULL; }; #endif Loading src/dmrg_malloc.c +12 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,18 @@ int dmrg_is_managed( const void *ptr ) return( is_managed ); } void dmrg_memcpy(void *dest, const void *src, size_t count) { #ifdef USE_MAGMA cudaError_t istat = cudaMemcpy( dest, src, count, cudaMemcpyDefault ); if (istat != cudaSuccess) { fprintf(stderr,"dmrg_memcpy: %s\n", cudaGetErrorString(istat)); }; assert( istat == cudaSuccess ); #else memcpy( dest, src, count ); #endif } void *dmrg_malloc(const size_t alloc_size ) { Loading src/dmrg_vbatch.h +2 −0 Original line number Diff line number Diff line Loading @@ -45,6 +45,8 @@ extern "C" { extern void dmrg_init(); extern void dmrg_memcpy(void *dest, const void *src, size_t n); extern int dmrg_is_managed( const void *ptr ); Loading src/test_vbatch.c +38 −44 Original line number Diff line number Diff line Loading @@ -10,7 +10,6 @@ #include <omp.h> #endif #include "analysis.h" Loading Loading @@ -150,41 +149,6 @@ int main(int argc, char *argv[]) xy_size += (nrowX * ncolX ); }; } /* ------------------- estimate total work ------------------- */ double total_flops_method1 = 0.0; double total_flops_method2 = 0.0; double total_flops = 0.0; { int ipatch = 0; int jpatch = 0; for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { double flops_total = 0.0; double flops_method1 = 0.0; double flops_method2 = 0.0; int nrowA = left_patch_size(ipatch); int ncolA = left_patch_size(jpatch); int nrowB = right_patch_size(ipatch); int ncolB = right_patch_size(jpatch); cal_kron_flops( nrowA, nrowB, ncolA, ncolB, &flops_total, &flops_method1, &flops_method2); total_flops += flops_total; total_flops_method1 += flops_method1; total_flops_method2 += flops_method2; }; }; }; total_flops *= noperator; total_flops_method1 *= noperator; total_flops_method2 *= noperator; Loading @@ -194,15 +158,7 @@ int main(int argc, char *argv[]) keep_left_states, keep_right_states, noperator, (long int) xy_size ); printf("npatches=%d\n", npatches ); { double total_gflops = total_flops/(1000.0*1000.0*1000.0); double total_gflops_method1 = total_flops_method1 / (1000.0*1000.0*1000.0); double total_gflops_method2 = total_flops_method2 / (1000.0*1000.0*1000.0); printf("total_gflops=%lf, total_gflops_method1=%lf, total_gflops_method2=%lf\n", total_gflops, total_gflops_method1, total_gflops_method2 ); }; if (idebug >= 1) { Loading Loading @@ -271,6 +227,14 @@ int main(int argc, char *argv[]) } else { nC_ = (int *) malloc( sizeof(int) * npatches * npatches ); assert( nC_ != NULL ); int i = 0; for(i=0; i < npatches*npatches; i++) { nC_[i] = noperator; }; setup_vbatch( noperator, npatches, &(left_patch_size_[0]), Loading @@ -284,6 +248,36 @@ int main(int argc, char *argv[]) Amatrix, ld_Amatrix, Bmatrix, ld_Bmatrix ); }; { double total_gflops = 0; double gmemA = 0; double gmemB = 0; double gmemBX = 0; estimate_work( npatches, left_patch_size_, right_patch_size_, nC_, &total_gflops, &gmemA, &gmemB, &gmemBX ); double giga = 1000.0*1000.0*1000.0; double gmemA_gbytes = gmemA * sizeof(double)/giga; double gmemB_gbytes = gmemB * sizeof(double)/giga; double gmemBX_gbytes = gmemBX * sizeof(double)/giga; printf("total_gflops=%lf \n", total_gflops ); printf("memory for Amat=%lf GBytes\n", gmemA_gbytes ); printf("memory for Bmat=%lf GBytes\n", gmemB_gbytes ); printf("memory for BXmat=%lf GBytes\n", gmemBX_gbytes ); } #define Abatch(i,j) Abatch_[indx2f(i,j,ld_Abatch)] #define Bbatch(i,j) Bbatch_[indx2f(i,j,ld_Bbatch)] Loading src/test_vbatch.h +2 −0 Original line number Diff line number Diff line Loading @@ -5,6 +5,7 @@ #include <math.h> #include <assert.h> #include "analysis.h" #include "dmrg_types.h" #include "dmrg_vbatch.h" Loading @@ -12,5 +13,6 @@ #include "setup_sparse_batch.h" #include "setup_matrix.h" #include "estimate_work.h" #endif Loading
src/apply_Htarget_sparse.c +8 −2 Original line number Diff line number Diff line Loading @@ -103,7 +103,10 @@ void apply_Htarget_sparse( if (need_allocate_X) { X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); assert( X_ != NULL ); memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size ); void *dest = (void *) &(X_[0]); void *src = (void *) &(Xin_[0]); size_t count = sizeof(FpType) * xy_size; dmrg_memcpy( dest, src, count ); }; if (need_allocate_Y) { Loading Loading @@ -501,7 +504,10 @@ void apply_Htarget_sparse( dmrg_free( X_ ); X_ = NULL; }; if (need_allocate_Y) { memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size ); void *dest = &(Yout_[0]); void *src = &(Y_[0]); size_t count = sizeof(FpType) * xy_size; dmrg_memcpy( dest, src, count ); dmrg_free( Y_ ); Y_ = NULL; }; #endif Loading
src/dmrg_malloc.c +12 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,18 @@ int dmrg_is_managed( const void *ptr ) return( is_managed ); } void dmrg_memcpy(void *dest, const void *src, size_t count) { #ifdef USE_MAGMA cudaError_t istat = cudaMemcpy( dest, src, count, cudaMemcpyDefault ); if (istat != cudaSuccess) { fprintf(stderr,"dmrg_memcpy: %s\n", cudaGetErrorString(istat)); }; assert( istat == cudaSuccess ); #else memcpy( dest, src, count ); #endif } void *dmrg_malloc(const size_t alloc_size ) { Loading
src/dmrg_vbatch.h +2 −0 Original line number Diff line number Diff line Loading @@ -45,6 +45,8 @@ extern "C" { extern void dmrg_init(); extern void dmrg_memcpy(void *dest, const void *src, size_t n); extern int dmrg_is_managed( const void *ptr ); Loading
src/test_vbatch.c +38 −44 Original line number Diff line number Diff line Loading @@ -10,7 +10,6 @@ #include <omp.h> #endif #include "analysis.h" Loading Loading @@ -150,41 +149,6 @@ int main(int argc, char *argv[]) xy_size += (nrowX * ncolX ); }; } /* ------------------- estimate total work ------------------- */ double total_flops_method1 = 0.0; double total_flops_method2 = 0.0; double total_flops = 0.0; { int ipatch = 0; int jpatch = 0; for(jpatch=1; jpatch <= npatches; jpatch++) { for(ipatch=1; ipatch <= npatches; ipatch++) { double flops_total = 0.0; double flops_method1 = 0.0; double flops_method2 = 0.0; int nrowA = left_patch_size(ipatch); int ncolA = left_patch_size(jpatch); int nrowB = right_patch_size(ipatch); int ncolB = right_patch_size(jpatch); cal_kron_flops( nrowA, nrowB, ncolA, ncolB, &flops_total, &flops_method1, &flops_method2); total_flops += flops_total; total_flops_method1 += flops_method1; total_flops_method2 += flops_method2; }; }; }; total_flops *= noperator; total_flops_method1 *= noperator; total_flops_method2 *= noperator; Loading @@ -194,15 +158,7 @@ int main(int argc, char *argv[]) keep_left_states, keep_right_states, noperator, (long int) xy_size ); printf("npatches=%d\n", npatches ); { double total_gflops = total_flops/(1000.0*1000.0*1000.0); double total_gflops_method1 = total_flops_method1 / (1000.0*1000.0*1000.0); double total_gflops_method2 = total_flops_method2 / (1000.0*1000.0*1000.0); printf("total_gflops=%lf, total_gflops_method1=%lf, total_gflops_method2=%lf\n", total_gflops, total_gflops_method1, total_gflops_method2 ); }; if (idebug >= 1) { Loading Loading @@ -271,6 +227,14 @@ int main(int argc, char *argv[]) } else { nC_ = (int *) malloc( sizeof(int) * npatches * npatches ); assert( nC_ != NULL ); int i = 0; for(i=0; i < npatches*npatches; i++) { nC_[i] = noperator; }; setup_vbatch( noperator, npatches, &(left_patch_size_[0]), Loading @@ -284,6 +248,36 @@ int main(int argc, char *argv[]) Amatrix, ld_Amatrix, Bmatrix, ld_Bmatrix ); }; { double total_gflops = 0; double gmemA = 0; double gmemB = 0; double gmemBX = 0; estimate_work( npatches, left_patch_size_, right_patch_size_, nC_, &total_gflops, &gmemA, &gmemB, &gmemBX ); double giga = 1000.0*1000.0*1000.0; double gmemA_gbytes = gmemA * sizeof(double)/giga; double gmemB_gbytes = gmemB * sizeof(double)/giga; double gmemBX_gbytes = gmemBX * sizeof(double)/giga; printf("total_gflops=%lf \n", total_gflops ); printf("memory for Amat=%lf GBytes\n", gmemA_gbytes ); printf("memory for Bmat=%lf GBytes\n", gmemB_gbytes ); printf("memory for BXmat=%lf GBytes\n", gmemBX_gbytes ); } #define Abatch(i,j) Abatch_[indx2f(i,j,ld_Abatch)] #define Bbatch(i,j) Bbatch_[indx2f(i,j,ld_Bbatch)] Loading
src/test_vbatch.h +2 −0 Original line number Diff line number Diff line Loading @@ -5,6 +5,7 @@ #include <math.h> #include <assert.h> #include "analysis.h" #include "dmrg_types.h" #include "dmrg_vbatch.h" Loading @@ -12,5 +13,6 @@ #include "setup_sparse_batch.h" #include "setup_matrix.h" #include "estimate_work.h" #endif