Loading src/exatn/tests/NumServerTester.cpp +69 −7 Original line number Diff line number Diff line Loading @@ -18,14 +18,15 @@ #include "errors.hpp" //Test activation: /*#define EXATN_TEST0 /* #define EXATN_TEST0 #define EXATN_TEST1 #define EXATN_TEST2 #define EXATN_TEST3 #define EXATN_TEST4 #define EXATN_TEST5*/ #define EXATN_TEST5 #define EXATN_TEST6 /*#define EXATN_TEST7 #define EXATN_TEST7 #define EXATN_TEST8 #define EXATN_TEST9 #define EXATN_TEST10 Loading @@ -44,13 +45,15 @@ #define EXATN_TEST23 #define EXATN_TEST24 #define EXATN_TEST25 #define EXATN_TEST26*/ #define EXATN_TEST26 //#define EXATN_TEST27 //requires input file from source //#define EXATN_TEST28 //requires input file from source //#define EXATN_TEST29 //#define EXATN_TEST30 #define EXATN_TEST29 #define EXATN_TEST30 //#define EXATN_TEST31 //requires input file from source //#define EXATN_TEST32 */ #define EXATN_TEST32 //#define EXATN_TEST33 #ifdef EXATN_TEST0 Loading Loading @@ -3774,6 +3777,65 @@ TEST(NumServerTester, ExcitedMCVQE) { #endif #ifdef EXATN_TEST32 TEST(NumServerTester, CuTensorNet) { using exatn::TensorShape; using exatn::TensorSignature; using exatn::Tensor; using exatn::TensorNetwork; using exatn::TensorExpansion; using exatn::TensorOperator; using exatn::TensorElementType; using exatn::TensorRange; const auto TENS_ELEM_TYPE = TensorElementType::REAL32; const int NUM_REPEATS = 3; //exatn::resetLoggingLevel(1,2); //debug bool success = true; //Create tensors: success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{96,64,64,96}); assert(success); success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{96,64,64}); assert(success); success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{64,96,64}); assert(success); success = exatn::createTensor("D",TENS_ELEM_TYPE,TensorShape{96,64,96,64}); assert(success); //Init tensors: success = exatn::initTensorRnd("A"); assert(success); success = exatn::initTensorRnd("B"); assert(success); success = exatn::initTensorRnd("C"); assert(success); success = exatn::initTensor("D",0.0); assert(success); //Contract tensor network: int num_repeats = NUM_REPEATS; while(--num_repeats >= 0){ success = exatn::sync(); assert(success); std::cout << "D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y): "; auto flops = exatn::getTotalFlopCount(); auto time_start = exatn::Timer::timeInSecHR(); success = exatn::evaluateTensorNetworkSync("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)"); assert(success); auto duration = exatn::Timer::timeInSecHR(time_start); flops = exatn::getTotalFlopCount() - flops; std::cout << "Performance = " << (flops / (1e9 * duration)) << " Gflop/s" << std::endl; } //Destroy tensors: success = exatn::sync(); assert(success); success = exatn::destroyTensor("D"); assert(success); success = exatn::destroyTensor("C"); assert(success); success = exatn::destroyTensor("B"); assert(success); success = exatn::destroyTensor("A"); assert(success); //Synchronize: success = exatn::sync(); assert(success); exatn::resetLoggingLevel(0,0); //Grab a beer! } #endif #ifdef EXATN_TEST33 TEST(NumServerTester, TensorComposite) { using exatn::TensorShape; using exatn::TensorSignature; Loading src/runtime/executor/cuquantum/cuquantum_executor.cu +7 −4 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2022/01/05 REVISION: 2022/01/06 Copyright (C) 2018-2022 Dmitry Lyakh Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -108,8 +108,11 @@ struct TensorNetworkReq { }; CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func, unsigned int pipeline_depth): tensor_data_access_func_(std::move(tensor_data_access_func)), pipe_depth_(pipeline_depth) CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func, unsigned int pipeline_depth, unsigned int process_rank, unsigned int num_processes): tensor_data_access_func_(std::move(tensor_data_access_func)), pipe_depth_(pipeline_depth), process_rank_(process_rank), num_processes_(num_processes) { static_assert(std::is_same<cutensornetHandle_t,void*>::value,"#FATAL(exatn::runtime::CuQuantumExecutor): cutensornetHandle_t != (void*)"); Loading Loading @@ -452,7 +455,7 @@ void CuQuantumExecutor::contractTensorNetwork(std::shared_ptr<TensorNetworkReq> &num_slices,sizeof(num_slices))); assert(num_slices > 0); HANDLE_CUDA_ERROR(cudaEventRecord(tn_req->compute_start,tn_req->stream)); for(int64_t slice_id = 0; slice_id < num_slices; ++slice_id){ for(int64_t slice_id = process_rank_; slice_id < num_slices; slice_id += num_processes_){ HANDLE_CTN_ERROR(cutensornetContraction(gpu_attr_[gpu].second.cutn_handle, tn_req->comp_plan, tn_req->data_in,tn_req->data_out, Loading src/runtime/executor/cuquantum/cuquantum_executor.hpp +8 −2 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2022/01/05 REVISION: 2022/01/06 Copyright (C) 2018-2022 Dmitry Lyakh Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -41,7 +41,9 @@ class CuQuantumExecutor { public: CuQuantumExecutor(TensorImplFunc tensor_data_access_func, unsigned int pipeline_depth); unsigned int pipeline_depth, unsigned int process_rank, unsigned int num_processes); CuQuantumExecutor(const CuQuantumExecutor &) = delete; CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete; Loading Loading @@ -98,6 +100,10 @@ protected: TensorImplFunc tensor_data_access_func_; //numerics::Tensor --> {tensor_body_ptr, size_in_bytes} /** Pipeline depth **/ const unsigned int pipe_depth_; /** Process rank **/ const unsigned int process_rank_; /** Total number of parallel processes **/ const unsigned int num_processes_; }; } //namespace runtime Loading src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp +6 −3 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy REVISION: 2022/01/05 REVISION: 2022/01/06 Copyright (C) 2018-2022 Dmitry Lyakh Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -25,10 +25,11 @@ namespace runtime { void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, const ParamConf & parameters, unsigned int num_processes, unsigned int process_rank, unsigned int global_process_rank) { TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank); TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,num_processes,process_rank,global_process_rank); #ifdef CUQUANTUM if(node_executor){ cuquantum_executor_ = std::make_shared<CuQuantumExecutor>( Loading @@ -36,7 +37,9 @@ void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> no void * data_ptr = this->node_executor_->getTensorImage(tensor,device_kind,device_id,size); return data_ptr; }, CUQUANTUM_PIPELINE_DEPTH CUQUANTUM_PIPELINE_DEPTH, process_rank, num_processes ); } #endif Loading src/runtime/executor/graph_executors/lazy/graph_executor_lazy.hpp +2 −1 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy REVISION: 2022/01/05 REVISION: 2022/01/06 Copyright (C) 2018-2022 Dmitry Lyakh, Alex McCaskey Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -45,6 +45,7 @@ public: /** Sets/resets the DAG node executor (tensor operation executor). **/ virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, const ParamConf & parameters, unsigned int num_processes, unsigned int process_rank, unsigned int global_process_rank) override; Loading Loading
src/exatn/tests/NumServerTester.cpp +69 −7 Original line number Diff line number Diff line Loading @@ -18,14 +18,15 @@ #include "errors.hpp" //Test activation: /*#define EXATN_TEST0 /* #define EXATN_TEST0 #define EXATN_TEST1 #define EXATN_TEST2 #define EXATN_TEST3 #define EXATN_TEST4 #define EXATN_TEST5*/ #define EXATN_TEST5 #define EXATN_TEST6 /*#define EXATN_TEST7 #define EXATN_TEST7 #define EXATN_TEST8 #define EXATN_TEST9 #define EXATN_TEST10 Loading @@ -44,13 +45,15 @@ #define EXATN_TEST23 #define EXATN_TEST24 #define EXATN_TEST25 #define EXATN_TEST26*/ #define EXATN_TEST26 //#define EXATN_TEST27 //requires input file from source //#define EXATN_TEST28 //requires input file from source //#define EXATN_TEST29 //#define EXATN_TEST30 #define EXATN_TEST29 #define EXATN_TEST30 //#define EXATN_TEST31 //requires input file from source //#define EXATN_TEST32 */ #define EXATN_TEST32 //#define EXATN_TEST33 #ifdef EXATN_TEST0 Loading Loading @@ -3774,6 +3777,65 @@ TEST(NumServerTester, ExcitedMCVQE) { #endif #ifdef EXATN_TEST32 TEST(NumServerTester, CuTensorNet) { using exatn::TensorShape; using exatn::TensorSignature; using exatn::Tensor; using exatn::TensorNetwork; using exatn::TensorExpansion; using exatn::TensorOperator; using exatn::TensorElementType; using exatn::TensorRange; const auto TENS_ELEM_TYPE = TensorElementType::REAL32; const int NUM_REPEATS = 3; //exatn::resetLoggingLevel(1,2); //debug bool success = true; //Create tensors: success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{96,64,64,96}); assert(success); success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{96,64,64}); assert(success); success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{64,96,64}); assert(success); success = exatn::createTensor("D",TENS_ELEM_TYPE,TensorShape{96,64,96,64}); assert(success); //Init tensors: success = exatn::initTensorRnd("A"); assert(success); success = exatn::initTensorRnd("B"); assert(success); success = exatn::initTensorRnd("C"); assert(success); success = exatn::initTensor("D",0.0); assert(success); //Contract tensor network: int num_repeats = NUM_REPEATS; while(--num_repeats >= 0){ success = exatn::sync(); assert(success); std::cout << "D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y): "; auto flops = exatn::getTotalFlopCount(); auto time_start = exatn::Timer::timeInSecHR(); success = exatn::evaluateTensorNetworkSync("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)"); assert(success); auto duration = exatn::Timer::timeInSecHR(time_start); flops = exatn::getTotalFlopCount() - flops; std::cout << "Performance = " << (flops / (1e9 * duration)) << " Gflop/s" << std::endl; } //Destroy tensors: success = exatn::sync(); assert(success); success = exatn::destroyTensor("D"); assert(success); success = exatn::destroyTensor("C"); assert(success); success = exatn::destroyTensor("B"); assert(success); success = exatn::destroyTensor("A"); assert(success); //Synchronize: success = exatn::sync(); assert(success); exatn::resetLoggingLevel(0,0); //Grab a beer! } #endif #ifdef EXATN_TEST33 TEST(NumServerTester, TensorComposite) { using exatn::TensorShape; using exatn::TensorSignature; Loading
src/runtime/executor/cuquantum/cuquantum_executor.cu +7 −4 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2022/01/05 REVISION: 2022/01/06 Copyright (C) 2018-2022 Dmitry Lyakh Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -108,8 +108,11 @@ struct TensorNetworkReq { }; CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func, unsigned int pipeline_depth): tensor_data_access_func_(std::move(tensor_data_access_func)), pipe_depth_(pipeline_depth) CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func, unsigned int pipeline_depth, unsigned int process_rank, unsigned int num_processes): tensor_data_access_func_(std::move(tensor_data_access_func)), pipe_depth_(pipeline_depth), process_rank_(process_rank), num_processes_(num_processes) { static_assert(std::is_same<cutensornetHandle_t,void*>::value,"#FATAL(exatn::runtime::CuQuantumExecutor): cutensornetHandle_t != (void*)"); Loading Loading @@ -452,7 +455,7 @@ void CuQuantumExecutor::contractTensorNetwork(std::shared_ptr<TensorNetworkReq> &num_slices,sizeof(num_slices))); assert(num_slices > 0); HANDLE_CUDA_ERROR(cudaEventRecord(tn_req->compute_start,tn_req->stream)); for(int64_t slice_id = 0; slice_id < num_slices; ++slice_id){ for(int64_t slice_id = process_rank_; slice_id < num_slices; slice_id += num_processes_){ HANDLE_CTN_ERROR(cutensornetContraction(gpu_attr_[gpu].second.cutn_handle, tn_req->comp_plan, tn_req->data_in,tn_req->data_out, Loading
src/runtime/executor/cuquantum/cuquantum_executor.hpp +8 −2 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2022/01/05 REVISION: 2022/01/06 Copyright (C) 2018-2022 Dmitry Lyakh Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -41,7 +41,9 @@ class CuQuantumExecutor { public: CuQuantumExecutor(TensorImplFunc tensor_data_access_func, unsigned int pipeline_depth); unsigned int pipeline_depth, unsigned int process_rank, unsigned int num_processes); CuQuantumExecutor(const CuQuantumExecutor &) = delete; CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete; Loading Loading @@ -98,6 +100,10 @@ protected: TensorImplFunc tensor_data_access_func_; //numerics::Tensor --> {tensor_body_ptr, size_in_bytes} /** Pipeline depth **/ const unsigned int pipe_depth_; /** Process rank **/ const unsigned int process_rank_; /** Total number of parallel processes **/ const unsigned int num_processes_; }; } //namespace runtime Loading
src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp +6 −3 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy REVISION: 2022/01/05 REVISION: 2022/01/06 Copyright (C) 2018-2022 Dmitry Lyakh Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -25,10 +25,11 @@ namespace runtime { void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, const ParamConf & parameters, unsigned int num_processes, unsigned int process_rank, unsigned int global_process_rank) { TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank); TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,num_processes,process_rank,global_process_rank); #ifdef CUQUANTUM if(node_executor){ cuquantum_executor_ = std::make_shared<CuQuantumExecutor>( Loading @@ -36,7 +37,9 @@ void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> no void * data_ptr = this->node_executor_->getTensorImage(tensor,device_kind,device_id,size); return data_ptr; }, CUQUANTUM_PIPELINE_DEPTH CUQUANTUM_PIPELINE_DEPTH, process_rank, num_processes ); } #endif Loading
src/runtime/executor/graph_executors/lazy/graph_executor_lazy.hpp +2 −1 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy REVISION: 2022/01/05 REVISION: 2022/01/06 Copyright (C) 2018-2022 Dmitry Lyakh, Alex McCaskey Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -45,6 +45,7 @@ public: /** Sets/resets the DAG node executor (tensor operation executor). **/ virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, const ParamConf & parameters, unsigned int num_processes, unsigned int process_rank, unsigned int global_process_rank) override; Loading