Loading README.md +60 −0 Original line number Diff line number Diff line # DMRG++PluginSc ## How to build Download and build PsimagLite ```BASH $ git clone https://code.ornl.gov/gonzalo_3/PsimagLite $ cd PsimagLite/lib $ git checkout features $ ./configure.pl # to take advantage of 4 CPU cores $ make -j 4 ``` Download and build DMRG++PluginSc (this repo) ```BASH $ git clone https://code.ornl.gov/e6d/dmrgppPluginSc $ cd dmrgppPluginSc/src # create a make.inc file by modifying one of the existing ones # for example to take advantage of OpenMP on multiple CPU cores # cp make.inc.openmp make.inc # or take advantage of MAGMA library on GPU # cp make.inc.magma make.inc $ cp make.inc.openmp make.inc $ make -j 4 ``` Download DMRG++ ```BASH $ cd ../../ $ git clone https://code.ornl.gov/gonzalo_3/dmrgpp $ cd dmrgpp/ $ git checkout features $ cd src ``` Create a file myconfig.psiTag with the following content ``` flavor myflavor = ( < flavor production < dependency PluginSc ) default flavor = myflavor ``` and then ```BASH $ ./configure.pl -c myconfig.psiTag $ make -j 4 ``` To DMRG++ inputs, please add BatchedGemm to SolverOptions, like ``` SolverOptions=BatchedGemm,... ``` src/BatchedGemm.cpp +7 −0 Original line number Diff line number Diff line Loading @@ -9,7 +9,10 @@ #include <math.h> #include <stdio.h> #ifdef SHARE_GPU sem_t* BatchedGemm<FpType>:: gpu_access = NULL; #endif extern "C" void get_current_time_with_ms (time_t *sec , long *millisec); Loading Loading @@ -37,6 +40,7 @@ BatchedGemm<FpType>::BatchedGemm(int noperator, xy_patch_start_ = 0; #ifdef SHARE_GPU if (gpu_access == NULL){ gpu_access = sem_open("/dmrg_gpu", O_CREAT, 0666, 1); if(gpu_access == SEM_FAILED) { Loading @@ -50,6 +54,7 @@ BatchedGemm<FpType>::BatchedGemm(int noperator, sem_wait(gpu_access); get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : acuired GPU\n", (intmax_t)sec, ms); #endif if (use_sparse) { nC_ = 0; Loading Loading @@ -132,11 +137,13 @@ BatchedGemm<FpType>::~BatchedGemm() { &Bbatch_ ); }; #ifdef SHARE_GPU sem_post(gpu_access); time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : released GPU\n", (intmax_t)sec, ms); #endif } Loading src/apply_Htarget_vbatch.c +4 −4 Original line number Diff line number Diff line Loading @@ -30,10 +30,10 @@ void apply_Htarget_vbatch( const int ialign = 32; const double giga = 1000.0*1000.0*1000.0; double gflops1 = (FpType) 0.0; double gflops2 = (FpType) 0.0; double time_1st_vbatch = (FpType) 0.0; double time_2nd_vbatch = (FpType) 0.0; double gflops1 = (double) 0.0; double gflops2 = (double) 0.0; double time_1st_vbatch = (double) 0.0; double time_2nd_vbatch = (double) 0.0; size_t nbytes_BX = 0; Loading src/dmrg_magma.h +21 −0 Original line number Diff line number Diff line Loading @@ -10,6 +10,26 @@ #include "magma_types.h" #include "magma_v2.h" #include "magma_operators.h" #if defined(USE_COMPLEX_Z) #define MAGMA_FpType magmaDoubleComplex #elif defined(USE_COMPLEX_C) #define MAGMA_FpType magmaFloatComplex #elif defined(USE_FLOAT) #define MAGMA_FpType float #else #define MAGMA_FpType double #endif /* Loading Loading @@ -562,6 +582,7 @@ void magma_Xsetvector( magma_int_t n, #define magmablas_Xgemm_vbatched_max_nocheck magmablas_zgemm_vbatched_max_nocheck #define magmablas_Xgemm_vbatched_max magmablas_zgemm_vbatched_max #elif defined(USE_COMPLEX_C) #define magma_Xsetmatrix magma_csetmatrix Loading src/dmrg_malloc.c +11 −3 Original line number Diff line number Diff line #include <stdlib.h> #include <stdio.h> #include "dmrg_vbatch.h" #ifdef USE_MAGMA Loading @@ -6,16 +7,23 @@ #include "cuda_runtime.h" #endif #include "dmrg_lapack.h" int dmrg_is_managed( const void *ptr ) { const int false = (0 == 1); int is_managed = false; const int lfalse = (0 == 1); int is_managed = lfalse; #ifdef USE_MAGMA struct cudaPointerAttributes attribute; cudaError_t ierr = cudaPointerGetAttributes( &attribute, ptr ); #if defined(CUDART_VERSION) && (CUDART_VERSION >= 10000) is_managed = (ierr == cudaSuccess) && (attribute.type == cudaMemoryTypeManaged); #else is_managed = (ierr == cudaSuccess) && attribute.isManaged; #endif #endif return( is_managed ); } Loading Loading
README.md +60 −0 Original line number Diff line number Diff line # DMRG++PluginSc ## How to build Download and build PsimagLite ```BASH $ git clone https://code.ornl.gov/gonzalo_3/PsimagLite $ cd PsimagLite/lib $ git checkout features $ ./configure.pl # to take advantage of 4 CPU cores $ make -j 4 ``` Download and build DMRG++PluginSc (this repo) ```BASH $ git clone https://code.ornl.gov/e6d/dmrgppPluginSc $ cd dmrgppPluginSc/src # create a make.inc file by modifying one of the existing ones # for example to take advantage of OpenMP on multiple CPU cores # cp make.inc.openmp make.inc # or take advantage of MAGMA library on GPU # cp make.inc.magma make.inc $ cp make.inc.openmp make.inc $ make -j 4 ``` Download DMRG++ ```BASH $ cd ../../ $ git clone https://code.ornl.gov/gonzalo_3/dmrgpp $ cd dmrgpp/ $ git checkout features $ cd src ``` Create a file myconfig.psiTag with the following content ``` flavor myflavor = ( < flavor production < dependency PluginSc ) default flavor = myflavor ``` and then ```BASH $ ./configure.pl -c myconfig.psiTag $ make -j 4 ``` To DMRG++ inputs, please add BatchedGemm to SolverOptions, like ``` SolverOptions=BatchedGemm,... ```
src/BatchedGemm.cpp +7 −0 Original line number Diff line number Diff line Loading @@ -9,7 +9,10 @@ #include <math.h> #include <stdio.h> #ifdef SHARE_GPU sem_t* BatchedGemm<FpType>:: gpu_access = NULL; #endif extern "C" void get_current_time_with_ms (time_t *sec , long *millisec); Loading Loading @@ -37,6 +40,7 @@ BatchedGemm<FpType>::BatchedGemm(int noperator, xy_patch_start_ = 0; #ifdef SHARE_GPU if (gpu_access == NULL){ gpu_access = sem_open("/dmrg_gpu", O_CREAT, 0666, 1); if(gpu_access == SEM_FAILED) { Loading @@ -50,6 +54,7 @@ BatchedGemm<FpType>::BatchedGemm(int noperator, sem_wait(gpu_access); get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : acuired GPU\n", (intmax_t)sec, ms); #endif if (use_sparse) { nC_ = 0; Loading Loading @@ -132,11 +137,13 @@ BatchedGemm<FpType>::~BatchedGemm() { &Bbatch_ ); }; #ifdef SHARE_GPU sem_post(gpu_access); time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : released GPU\n", (intmax_t)sec, ms); #endif } Loading
src/apply_Htarget_vbatch.c +4 −4 Original line number Diff line number Diff line Loading @@ -30,10 +30,10 @@ void apply_Htarget_vbatch( const int ialign = 32; const double giga = 1000.0*1000.0*1000.0; double gflops1 = (FpType) 0.0; double gflops2 = (FpType) 0.0; double time_1st_vbatch = (FpType) 0.0; double time_2nd_vbatch = (FpType) 0.0; double gflops1 = (double) 0.0; double gflops2 = (double) 0.0; double time_1st_vbatch = (double) 0.0; double time_2nd_vbatch = (double) 0.0; size_t nbytes_BX = 0; Loading
src/dmrg_magma.h +21 −0 Original line number Diff line number Diff line Loading @@ -10,6 +10,26 @@ #include "magma_types.h" #include "magma_v2.h" #include "magma_operators.h" #if defined(USE_COMPLEX_Z) #define MAGMA_FpType magmaDoubleComplex #elif defined(USE_COMPLEX_C) #define MAGMA_FpType magmaFloatComplex #elif defined(USE_FLOAT) #define MAGMA_FpType float #else #define MAGMA_FpType double #endif /* Loading Loading @@ -562,6 +582,7 @@ void magma_Xsetvector( magma_int_t n, #define magmablas_Xgemm_vbatched_max_nocheck magmablas_zgemm_vbatched_max_nocheck #define magmablas_Xgemm_vbatched_max magmablas_zgemm_vbatched_max #elif defined(USE_COMPLEX_C) #define magma_Xsetmatrix magma_csetmatrix Loading
src/dmrg_malloc.c +11 −3 Original line number Diff line number Diff line #include <stdlib.h> #include <stdio.h> #include "dmrg_vbatch.h" #ifdef USE_MAGMA Loading @@ -6,16 +7,23 @@ #include "cuda_runtime.h" #endif #include "dmrg_lapack.h" int dmrg_is_managed( const void *ptr ) { const int false = (0 == 1); int is_managed = false; const int lfalse = (0 == 1); int is_managed = lfalse; #ifdef USE_MAGMA struct cudaPointerAttributes attribute; cudaError_t ierr = cudaPointerGetAttributes( &attribute, ptr ); #if defined(CUDART_VERSION) && (CUDART_VERSION >= 10000) is_managed = (ierr == cudaSuccess) && (attribute.type == cudaMemoryTypeManaged); #else is_managed = (ierr == cudaSuccess) && attribute.isManaged; #endif #endif return( is_managed ); } Loading