Loading src/apply_Htarget_sparse.c +26 −11 Original line number Diff line number Diff line Loading @@ -50,6 +50,10 @@ void apply_Htarget_sparse( FpType *X_ = &(Xin_[0]); FpType *Y_ = &(Yout_[0]); #ifdef USE_MAGMA int need_allocate_X = !dmrg_is_managed( Xin_ ); int need_allocate_Y = !dmrg_is_managed( Yout_ ); #endif /* ------------------ Loading Loading @@ -78,23 +82,29 @@ void apply_Htarget_sparse( right_patch_size(ipatch) = R2 - R1 + 1; }; long Xsize = 0; #ifdef USE_MAGMA long xy_size = 0; for(ipatch=1; ipatch <= npatches; ipatch++) { Xsize += left_patch_size(ipatch) * right_patch_size(ipatch); xy_size += left_patch_size(ipatch) * right_patch_size(ipatch); }; long xy_size_dim = ialign * ICEIL( xy_size, ialign ); #ifdef MAGMA /* * ----------------------- * allocate unified memory * ----------------------- */ X_ = (FpType *) dmrg_malloc( sizeof(FpType) * Xsize ); Y_ = (FpType *) dmrg_malloc( sizeof(FpType) * Xsize ); if (need_allocate_X) { X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); assert( X_ != NULL ); memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size ); }; if (need_allocate_Y) { Y_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); assert( Y_ != NULL ); }; memcpy( &(X_[0]), &(Xin_[0]), nbytes ); #endif Loading Loading @@ -480,9 +490,14 @@ void apply_Htarget_sparse( * free unified memory * ------------------- */ memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * Xsize ); if (need_allocate_X) { dmrg_free( X_ ); X_ = NULL; }; if (need_allocate_Y) { memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size ); dmrg_free( Y_ ); Y_ = NULL; }; #endif } Loading src/dmrg_malloc.c +16 −0 Original line number Diff line number Diff line Loading @@ -6,6 +6,22 @@ #include "cuda_runtime.h" #endif int dmrg_is_managed( const void *ptr ) { const int false = (0 == 1); int is_managed = false; #ifdef USE_MAGMA struct cudaPointerAttributes attribute; cudaError_t ierr = cudaPointerGetAttributes( &attribute, ptr ); assert( ierr == cudaSuccess ); is_managed = attribute.isManaged; #endif return( is_managed ); } void *dmrg_malloc(const size_t alloc_size ) { void *ptr = NULL; Loading src/dmrg_vbatch.h +2 −0 Original line number Diff line number Diff line Loading @@ -46,6 +46,8 @@ extern void dmrg_init(); extern int dmrg_is_managed( const void *ptr ); extern void *dmrg_malloc( const size_t alloc_size ); Loading Loading
src/apply_Htarget_sparse.c +26 −11 Original line number Diff line number Diff line Loading @@ -50,6 +50,10 @@ void apply_Htarget_sparse( FpType *X_ = &(Xin_[0]); FpType *Y_ = &(Yout_[0]); #ifdef USE_MAGMA int need_allocate_X = !dmrg_is_managed( Xin_ ); int need_allocate_Y = !dmrg_is_managed( Yout_ ); #endif /* ------------------ Loading Loading @@ -78,23 +82,29 @@ void apply_Htarget_sparse( right_patch_size(ipatch) = R2 - R1 + 1; }; long Xsize = 0; #ifdef USE_MAGMA long xy_size = 0; for(ipatch=1; ipatch <= npatches; ipatch++) { Xsize += left_patch_size(ipatch) * right_patch_size(ipatch); xy_size += left_patch_size(ipatch) * right_patch_size(ipatch); }; long xy_size_dim = ialign * ICEIL( xy_size, ialign ); #ifdef MAGMA /* * ----------------------- * allocate unified memory * ----------------------- */ X_ = (FpType *) dmrg_malloc( sizeof(FpType) * Xsize ); Y_ = (FpType *) dmrg_malloc( sizeof(FpType) * Xsize ); if (need_allocate_X) { X_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); assert( X_ != NULL ); memcpy( &(X_[0]), &(Xin_[0]), sizeof(FpType) * xy_size ); }; if (need_allocate_Y) { Y_ = (FpType *) dmrg_malloc( sizeof(FpType) * xy_size_dim ); assert( Y_ != NULL ); }; memcpy( &(X_[0]), &(Xin_[0]), nbytes ); #endif Loading Loading @@ -480,9 +490,14 @@ void apply_Htarget_sparse( * free unified memory * ------------------- */ memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * Xsize ); if (need_allocate_X) { dmrg_free( X_ ); X_ = NULL; }; if (need_allocate_Y) { memcpy( &(Yout_[0]), &(Y_[0]), sizeof(FpType) * xy_size ); dmrg_free( Y_ ); Y_ = NULL; }; #endif } Loading
src/dmrg_malloc.c +16 −0 Original line number Diff line number Diff line Loading @@ -6,6 +6,22 @@ #include "cuda_runtime.h" #endif int dmrg_is_managed( const void *ptr ) { const int false = (0 == 1); int is_managed = false; #ifdef USE_MAGMA struct cudaPointerAttributes attribute; cudaError_t ierr = cudaPointerGetAttributes( &attribute, ptr ); assert( ierr == cudaSuccess ); is_managed = attribute.isManaged; #endif return( is_managed ); } void *dmrg_malloc(const size_t alloc_size ) { void *ptr = NULL; Loading
src/dmrg_vbatch.h +2 −0 Original line number Diff line number Diff line Loading @@ -46,6 +46,8 @@ extern void dmrg_init(); extern int dmrg_is_managed( const void *ptr ); extern void *dmrg_malloc( const size_t alloc_size ); Loading