Loading src/BatchedGemm.h +1 −0 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ #include <stdexcept> #include "dmrg_types.h" #include "estimate_work.h" template<typename T> class BatchedGemm { Loading src/Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ OBJS=\ setup_sparse_batch.o \ unsetup_sparse_batch.o \ apply_Htarget_sparse.o \ estimate_work.o \ BatchedGemm.o libdmrgppPluginSc.a: $(OBJS) Loading src/apply_Htarget_sparse.c +8 −2 Original line number Diff line number Diff line Loading @@ -103,7 +103,10 @@ void apply_Htarget_sparse( if (need_allocate_X) { X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); assert( X_ != NULL ); memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size ); void *dest = (void *) &(X_[0]); void *src = (void *) &(Xin_[0]); size_t count = sizeof(FpType) * xy_size; dmrg_memcpy( dest, src, count ); }; if (need_allocate_Y) { Loading Loading @@ -501,7 +504,10 @@ void apply_Htarget_sparse( dmrg_free( X_ ); X_ = NULL; }; if (need_allocate_Y) { memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size ); void *dest = &(Yout_[0]); void *src = &(Y_[0]); size_t count = sizeof(FpType) * xy_size; dmrg_memcpy( dest, src, count ); dmrg_free( Y_ ); Y_ = NULL; }; #endif Loading src/cal_kron_flops.c +2 −1 Original line number Diff line number Diff line Loading @@ -23,7 +23,8 @@ void cal_kron_flops( double flops_BX = (2.0*nrowB) * ncolB * ncolX; int ncolBX = ncolX; double flops_BX_At = (2.0*nrowY) * ncolY * ncolBX; int nrowBX = nrowB; double flops_BX_At = (2.0*nrowBX) * ncolBX * nrowA; double flops_method1 = flops_BX + flops_BX_At; /* Loading src/dmrg_malloc.c +12 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,18 @@ int dmrg_is_managed( const void *ptr ) return( is_managed ); } void dmrg_memcpy(void *dest, const void *src, size_t count) { #ifdef USE_MAGMA cudaError_t istat = cudaMemcpy( dest, src, count, cudaMemcpyDefault ); if (istat != cudaSuccess) { fprintf(stderr,"dmrg_memcpy: %s\n", cudaGetErrorString(istat)); }; assert( istat == cudaSuccess ); #else memcpy( dest, src, count ); #endif } void *dmrg_malloc(const size_t alloc_size ) { Loading Loading
src/BatchedGemm.h +1 −0 Original line number Diff line number Diff line Loading @@ -3,6 +3,7 @@ #include <stdexcept> #include "dmrg_types.h" #include "estimate_work.h" template<typename T> class BatchedGemm { Loading
src/Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ OBJS=\ setup_sparse_batch.o \ unsetup_sparse_batch.o \ apply_Htarget_sparse.o \ estimate_work.o \ BatchedGemm.o libdmrgppPluginSc.a: $(OBJS) Loading
src/apply_Htarget_sparse.c +8 −2 Original line number Diff line number Diff line Loading @@ -103,7 +103,10 @@ void apply_Htarget_sparse( if (need_allocate_X) { X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); assert( X_ != NULL ); memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size ); void *dest = (void *) &(X_[0]); void *src = (void *) &(Xin_[0]); size_t count = sizeof(FpType) * xy_size; dmrg_memcpy( dest, src, count ); }; if (need_allocate_Y) { Loading Loading @@ -501,7 +504,10 @@ void apply_Htarget_sparse( dmrg_free( X_ ); X_ = NULL; }; if (need_allocate_Y) { memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size ); void *dest = &(Yout_[0]); void *src = &(Y_[0]); size_t count = sizeof(FpType) * xy_size; dmrg_memcpy( dest, src, count ); dmrg_free( Y_ ); Y_ = NULL; }; #endif Loading
src/cal_kron_flops.c +2 −1 Original line number Diff line number Diff line Loading @@ -23,7 +23,8 @@ void cal_kron_flops( double flops_BX = (2.0*nrowB) * ncolB * ncolX; int ncolBX = ncolX; double flops_BX_At = (2.0*nrowY) * ncolY * ncolBX; int nrowBX = nrowB; double flops_BX_At = (2.0*nrowBX) * ncolBX * nrowA; double flops_method1 = flops_BX + flops_BX_At; /* Loading
src/dmrg_malloc.c +12 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,18 @@ int dmrg_is_managed( const void *ptr ) return( is_managed ); } void dmrg_memcpy(void *dest, const void *src, size_t count) { #ifdef USE_MAGMA cudaError_t istat = cudaMemcpy( dest, src, count, cudaMemcpyDefault ); if (istat != cudaSuccess) { fprintf(stderr,"dmrg_memcpy: %s\n", cudaGetErrorString(istat)); }; assert( istat == cudaSuccess ); #else memcpy( dest, src, count ); #endif } void *dmrg_malloc(const size_t alloc_size ) { Loading