Loading src/BatchedGemm.cpp +28 −1 Original line number Diff line number Diff line Loading @@ -3,6 +3,15 @@ #include "setup_sparse_batch.h" #include "BatchedGemm.h" //#include "timer.h" #define _POSIX_C_SOURCE 200809L #include <inttypes.h> #include <math.h> #include <stdio.h> sem_t* BatchedGemm<FpType>:: gpu_access = NULL; extern "C" void get_current_time_with_ms (time_t *sec , long *millisec); BatchedGemm<FpType>::BatchedGemm(int noperator, int npatches, Loading @@ -28,6 +37,20 @@ BatchedGemm<FpType>::BatchedGemm(int noperator, xy_patch_start_ = 0; if (gpu_access == NULL){ gpu_access = sem_open("/dmrg_gpu", O_CREAT, 0666, 1); if(gpu_access == SEM_FAILED) { perror("Semaphore init"); } } time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : waiting for GPU\n", (intmax_t)sec, ms); sem_wait(gpu_access); get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : acuired GPU\n", (intmax_t)sec, ms); if (use_sparse) { nC_ = 0; gAbatch_ = 0; Loading Loading @@ -109,7 +132,11 @@ BatchedGemm<FpType>::~BatchedGemm() { &Bbatch_ ); }; sem_post(gpu_access); time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : released GPU\n", (intmax_t)sec, ms); } Loading src/BatchedGemm.h +14 −0 Original line number Diff line number Diff line Loading @@ -4,6 +4,18 @@ #include "dmrg_types.h" #include "estimate_work.h" #include <sys/types.h> #include <sys/ipc.h> #include <sys/shm.h> #include <semaphore.h> #include <stdio.h> #include <string.h> #include <stdlib.h> #include <errno.h> #include <fcntl.h> #include <sys/stat.h> #include <sys/wait.h> #include <unistd.h> template<typename T> class BatchedGemm { Loading Loading @@ -55,6 +67,8 @@ private: int *ld_gAbatch_; int *ld_gBbatch_; static sem_t* gpu_access; BatchedGemm(const BatchedGemm<FpType>&); BatchedGemm& operator=(const BatchedGemm<FpType>&); Loading src/Makefile +2 −1 Original line number Diff line number Diff line Loading @@ -14,7 +14,8 @@ OBJS=\ unsetup_sparse_batch.o \ apply_Htarget_sparse.o \ estimate_work.o \ BatchedGemm.o BatchedGemm.o \ timer.o libdmrgppPluginSc.a: $(OBJS) ar cr libdmrgppPluginSc.a $(OBJS) Loading src/apply_Htarget_pvbatch.c +5 −6 Original line number Diff line number Diff line #include "dmrg_vbatch.h" #include "timer.h" #ifdef _OPENMP #include <omp.h> #endif Loading Loading @@ -367,18 +368,16 @@ void apply_Htarget_pvbatch( #ifdef _OPENMP time_2nd_vbatch += omp_get_wtime(); gflops2 = gflops2/(giga); time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf("1st vbatch %f gflops (gflops1=%lf,time=%lf)\n", gflops1/time_1st_vbatch, gflops1, time_1st_vbatch ); printf("2nd vbatch %f gflops (gflops2=%lf,time=%lf)\n", gflops2/time_2nd_vbatch, gflops2, time_2nd_vbatch ); printf("overall %f gflops\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); printf("apply_Htarget_pvbatch:memory BX (%f GBytes)\n", (double) nbytes_BX/(giga) ); printf(" %"PRIdMAX".%03ld : overall %f gflops memory BX (%f GBytes)\n", (intmax_t)s, ms, (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch), (double) nbytes_BX/(giga) ); #endif Loading src/apply_Htarget_sparse.c +7 −4 Original line number Diff line number Diff line Loading @@ -5,6 +5,7 @@ #ifdef _OPENMP #include <omp.h> #endif #include "timer.h" #ifndef MAX #define MAX(x,y) ( ((x) > (y)) ?(x):(y) ) Loading Loading @@ -489,12 +490,14 @@ void apply_Htarget_sparse( gflops2 = gflops2/giga; if (idebug >= 1) { time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf("1st vbatch %lf gflops/sec (gflops1=%lf,time=%lf)\n", gflops1/time_1st_vbatch, gflops1, time_1st_vbatch ); printf("2nd vbatch %lf gflops/sec (gflops2=%lf,time=%lf)\n", gflops2/time_2nd_vbatch, gflops2, time_2nd_vbatch ); printf("overall %lf gflops/sec\n", printf(" %"PRIdMAX".%03ld : overall %f gflops\n", (intmax_t)sec, ms, (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); }; Loading Loading
src/BatchedGemm.cpp +28 −1 Original line number Diff line number Diff line Loading @@ -3,6 +3,15 @@ #include "setup_sparse_batch.h" #include "BatchedGemm.h" //#include "timer.h" #define _POSIX_C_SOURCE 200809L #include <inttypes.h> #include <math.h> #include <stdio.h> sem_t* BatchedGemm<FpType>:: gpu_access = NULL; extern "C" void get_current_time_with_ms (time_t *sec , long *millisec); BatchedGemm<FpType>::BatchedGemm(int noperator, int npatches, Loading @@ -28,6 +37,20 @@ BatchedGemm<FpType>::BatchedGemm(int noperator, xy_patch_start_ = 0; if (gpu_access == NULL){ gpu_access = sem_open("/dmrg_gpu", O_CREAT, 0666, 1); if(gpu_access == SEM_FAILED) { perror("Semaphore init"); } } time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : waiting for GPU\n", (intmax_t)sec, ms); sem_wait(gpu_access); get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : acuired GPU\n", (intmax_t)sec, ms); if (use_sparse) { nC_ = 0; gAbatch_ = 0; Loading Loading @@ -109,7 +132,11 @@ BatchedGemm<FpType>::~BatchedGemm() { &Bbatch_ ); }; sem_post(gpu_access); time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf(" %"PRIdMAX".%03ld : released GPU\n", (intmax_t)sec, ms); } Loading
src/BatchedGemm.h +14 −0 Original line number Diff line number Diff line Loading @@ -4,6 +4,18 @@ #include "dmrg_types.h" #include "estimate_work.h" #include <sys/types.h> #include <sys/ipc.h> #include <sys/shm.h> #include <semaphore.h> #include <stdio.h> #include <string.h> #include <stdlib.h> #include <errno.h> #include <fcntl.h> #include <sys/stat.h> #include <sys/wait.h> #include <unistd.h> template<typename T> class BatchedGemm { Loading Loading @@ -55,6 +67,8 @@ private: int *ld_gAbatch_; int *ld_gBbatch_; static sem_t* gpu_access; BatchedGemm(const BatchedGemm<FpType>&); BatchedGemm& operator=(const BatchedGemm<FpType>&); Loading
src/Makefile +2 −1 Original line number Diff line number Diff line Loading @@ -14,7 +14,8 @@ OBJS=\ unsetup_sparse_batch.o \ apply_Htarget_sparse.o \ estimate_work.o \ BatchedGemm.o BatchedGemm.o \ timer.o libdmrgppPluginSc.a: $(OBJS) ar cr libdmrgppPluginSc.a $(OBJS) Loading
src/apply_Htarget_pvbatch.c +5 −6 Original line number Diff line number Diff line #include "dmrg_vbatch.h" #include "timer.h" #ifdef _OPENMP #include <omp.h> #endif Loading Loading @@ -367,18 +368,16 @@ void apply_Htarget_pvbatch( #ifdef _OPENMP time_2nd_vbatch += omp_get_wtime(); gflops2 = gflops2/(giga); time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf("1st vbatch %f gflops (gflops1=%lf,time=%lf)\n", gflops1/time_1st_vbatch, gflops1, time_1st_vbatch ); printf("2nd vbatch %f gflops (gflops2=%lf,time=%lf)\n", gflops2/time_2nd_vbatch, gflops2, time_2nd_vbatch ); printf("overall %f gflops\n", (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); printf("apply_Htarget_pvbatch:memory BX (%f GBytes)\n", (double) nbytes_BX/(giga) ); printf(" %"PRIdMAX".%03ld : overall %f gflops memory BX (%f GBytes)\n", (intmax_t)s, ms, (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch), (double) nbytes_BX/(giga) ); #endif Loading
src/apply_Htarget_sparse.c +7 −4 Original line number Diff line number Diff line Loading @@ -5,6 +5,7 @@ #ifdef _OPENMP #include <omp.h> #endif #include "timer.h" #ifndef MAX #define MAX(x,y) ( ((x) > (y)) ?(x):(y) ) Loading Loading @@ -489,12 +490,14 @@ void apply_Htarget_sparse( gflops2 = gflops2/giga; if (idebug >= 1) { time_t sec; long ms; get_current_time_with_ms (&sec , &ms); printf("1st vbatch %lf gflops/sec (gflops1=%lf,time=%lf)\n", gflops1/time_1st_vbatch, gflops1, time_1st_vbatch ); printf("2nd vbatch %lf gflops/sec (gflops2=%lf,time=%lf)\n", gflops2/time_2nd_vbatch, gflops2, time_2nd_vbatch ); printf("overall %lf gflops/sec\n", printf(" %"PRIdMAX".%03ld : overall %f gflops\n", (intmax_t)sec, ms, (gflops1+gflops2)/(time_1st_vbatch + time_2nd_vbatch) ); }; Loading