Loading development.txt +5 −5 Original line number Diff line number Diff line Loading @@ -9,7 +9,7 @@ ISSUES: That is, the order of tensor operations across all participating processes must be consistent such that every encountered global tensor operation will receive the same tensor operand irrespective of the difference in the locally generated tensor name. Special of the difference in the locally generated tensor names. Special care needs to be taken in iterating over associative tensor containers, to ensure that the keys are consistent accross all participating processes. For example, automatically generated tensor names Loading @@ -21,9 +21,7 @@ ISSUES: BUGS: - 32-bit integer MPI message chunking issue in the backend. - Fix the bug(s) in the tensor order reduction mechanism in the TalshExecutor backend. - Fix the bug(s) in the tensor order reduction mechanism in the TalshNodeExecutor backend. FEATURES: Loading @@ -39,11 +37,13 @@ FEATURES: Contract replaced tensors, then replace the contracted tensor with a new tensor (sub)network. - Implement the Renormalization procedure. - Implement SAVE/LOAD API for TensorExpansion. - Implement TensorNetwork slice computing Generator. - Implement b-D procedure. - Implement bl-D procedure. - Implement conjugate gradient optimization procedure. Loading src/exatn/exatn_numerics.hpp +9 −3 Original line number Diff line number Diff line /** ExaTN::Numerics: General client header (free function API) REVISION: 2021/10/30 REVISION: 2022/01/07 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/ /** Rationale: 1. Vector space and subspace registration [spaces.hpp, space_register.hpp]: Loading Loading @@ -1086,6 +1086,12 @@ inline std::shared_ptr<exatn::TensorNetwork> makeTensorNetwork(const std::string // INTERNAL CONTROL API // ////////////////////////// /** Switches the computational backend: {"default","cuquantum"}. Only applies to tensor network execution. **/ inline void switchComputationalBackend(const std::string & backend_name) {return numericalServer->switchComputationalBackend(backend_name);} /** Resets the tensor contraction sequence optimizer that is invoked when evaluating tensor networks: {dummy,heuro,greed,metis}. **/ inline void resetContrSeqOptimizer(const std::string & optimizer_name) Loading src/exatn/num_server.cpp +64 −7 Original line number Diff line number Diff line /** ExaTN::Numerics: Numerical server REVISION: 2021/12/10 REVISION: 2022/01/07 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/ #include "num_server.hpp" #include "tensor_range.hpp" Loading Loading @@ -89,7 +89,8 @@ NumServer::NumServer(const MPICommProxy & communicator, const ParamConf & parameters, const std::string & graph_executor_name, const std::string & node_executor_name): contr_seq_optimizer_("metis"), contr_seq_caching_(false), logging_(0), intra_comm_(communicator), validation_tracing_(false) contr_seq_optimizer_("metis"), contr_seq_caching_(false), logging_(0), comp_backend_("default"), intra_comm_(communicator), validation_tracing_(false) { int mpi_error = MPI_Comm_size(*(communicator.get<MPI_Comm>()),&num_processes_); assert(mpi_error == MPI_SUCCESS); mpi_error = MPI_Comm_rank(*(communicator.get<MPI_Comm>()),&process_rank_); assert(mpi_error == MPI_SUCCESS); Loading Loading @@ -117,7 +118,8 @@ NumServer::NumServer(const MPICommProxy & communicator, NumServer::NumServer(const ParamConf & parameters, const std::string & graph_executor_name, const std::string & node_executor_name): contr_seq_optimizer_("metis"), contr_seq_caching_(false), logging_(0), validation_tracing_(false) contr_seq_optimizer_("metis"), contr_seq_caching_(false), logging_(0), comp_backend_("default"), validation_tracing_(false) { num_processes_ = 1; process_rank_ = 0; global_process_rank_ = 0; process_world_ = std::make_shared<ProcessGroup>(intra_comm_,num_processes_); //intra-communicator is empty here Loading Loading @@ -194,6 +196,22 @@ void NumServer::reconfigureTensorRuntime(const ParamConf & parameters, } #endif void NumServer::switchComputationalBackend(const std::string & backend_name) { bool success = sync(); assert(success); if(backend_name == "default"){ comp_backend_ = backend_name; #ifdef CUQUANTUM }else if(backend_name == "cuquantum"){ comp_backend_ = backend_name; #endif }else{ std::cout << "#ERROR(exatn::NumServer): switchComputationalBackend: Unknown backend: " << backend_name << std::endl; std::abort(); } return; } void NumServer::resetContrSeqOptimizer(const std::string & optimizer_name, bool caching) { contr_seq_optimizer_ = optimizer_name; Loading Loading @@ -612,7 +630,7 @@ bool NumServer::submit(const ProcessGroup & process_group, //Determine parallel execution configuration: unsigned int local_rank; //local process rank within the process group if(!process_group.rankIsIn(process_rank_,&local_rank)) return true; //process is not in the group: Do nothing assert(network.isValid()); //debug //assert(network.isValid()); //debug unsigned int num_procs = process_group.getSize(); //number of executing processes assert(local_rank < num_procs); if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart()) Loading Loading @@ -883,6 +901,30 @@ bool NumServer::submit(const ProcessGroup & process_group, bool NumServer::submit(const ProcessGroup & process_group, std::shared_ptr<TensorNetwork> network) { #ifdef CUQUANTUM //Try execution via an alternative computational backend: if(comp_backend_ == "cuquantum"){ //Determine parallel execution configuration: unsigned int local_rank; //local process rank within the process group if(!process_group.rankIsIn(process_rank_,&local_rank)) return true; //process is not in the group: Do nothing //assert(network->isValid()); //debug unsigned int num_procs = process_group.getSize(); //number of executing processes assert(local_rank < num_procs); if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart()) << "]: Submitting tensor network <" << network->getName() << "> (" << network->getTensor(0)->getName() << ") for execution via cuQuantum by " << num_procs << " processes with memory limit " << process_group.getMemoryLimitPerProcess() << " bytes" << std::endl << std::flush; if(logging_ > 0) network->printItFile(logfile_); const auto exec_handle = tensor_rt_->submit(network,process_group.getMPICommProxy(),num_procs,local_rank); bool success = (exec_handle != 0); if(success){ auto res = tn_exec_handles_.emplace(std::make_pair(network->getTensor(0)->getTensorHash(),exec_handle)); success = res.second; if(success && logging_ > 0) logfile_ << "Number of submitted networks via cuQuantum = 1" << std::endl << std::flush; } return success; } #endif if(network) return submit(process_group,*network); return false; } Loading Loading @@ -1030,6 +1072,14 @@ bool NumServer::sync(const ProcessGroup & process_group, const Tensor & tensor, { bool success = true; if(!process_group.rankIsIn(process_rank_)) return success; //process is not in the group: Do nothing #ifdef CUQUANTUM if(comp_backend_ == "cuquantum"){ auto iter = tn_exec_handles_.find(tensor.getTensorHash()); bool synced = (iter == tn_exec_handles_.end()); if(!synced) synced = tensor_rt_->syncNetwork(iter->second,wait); return synced; } #endif auto iter = tensors_.find(tensor.getName()); if(iter != tensors_.end()){ if(iter->second->isComposite()){ Loading Loading @@ -1081,7 +1131,11 @@ bool NumServer::sync(const ProcessGroup & process_group, TensorNetwork & network bool NumServer::sync(bool wait) { return sync(getCurrentProcessGroup(),wait); bool success = sync(getCurrentProcessGroup(),wait); #ifdef CUQUANTUM if(comp_backend_ == "cuquantum" && success) tn_exec_handles_.clear(); #endif return success; } bool NumServer::sync(const ProcessGroup & process_group, bool wait) Loading @@ -1092,6 +1146,9 @@ bool NumServer::sync(const ProcessGroup & process_group, bool wait) if(success){ if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart()) << "]: Locally synchronized all operations" << std::endl << std::flush; #ifdef CUQUANTUM if(comp_backend_ == "cuquantum") tn_exec_handles_.clear(); #endif #ifdef MPI_ENABLED if(wait){ auto errc = MPI_Barrier(process_group.getMPICommProxy().getRef<MPI_Comm>()); Loading src/exatn/num_server.hpp +19 −3 Original line number Diff line number Diff line /** ExaTN::Numerics: Numerical server REVISION: 2021/12/22 REVISION: 2022/01/07 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/ /** Rationale: (a) Numerical server provides basic tensor network processing functionality: Loading Loading @@ -270,6 +270,9 @@ public: const std::string & node_executor_name); #endif /** Switches the computational backend. **/ void switchComputationalBackend(const std::string & backend_name); /** Resets the tensor contraction sequence optimizer that is invoked when evaluating tensor networks. **/ void resetContrSeqOptimizer(const std::string & optimizer_name, //in: tensor contraction sequence optimizer name Loading Loading @@ -1032,25 +1035,38 @@ protected: private: //Spaces: std::shared_ptr<numerics::SpaceRegister> space_register_; //register of vector spaces and their named subspaces std::unordered_map<std::string,SpaceId> subname2id_; //maps a subspace name to its parental vector space id //Tensors: std::unordered_map<std::string,std::shared_ptr<Tensor>> tensors_; //registered tensors (by CREATE operation) std::map<std::string,std::shared_ptr<Tensor>> implicit_tensors_; //tensors created implicitly by the runtime (for garbage collection) std::unordered_map<std::string,ProcessGroup> tensor_comms_; //process group associated with each tensor #ifdef CUQUANTUM //Tensor network execution handles: std::unordered_map<numerics::TensorHashType,runtime::TensorOpExecHandle> tn_exec_handles_; #endif //Contraction path optimizer: std::string contr_seq_optimizer_; //tensor contraction sequence optimizer invoked when evaluating tensor networks bool contr_seq_caching_; //regulates whether or not to cache pseudo-optimal tensor contraction orders for later reuse //Registered external methods and data: std::map<std::string,std::shared_ptr<TensorMethod>> ext_methods_; //external tensor methods std::map<std::string,std::shared_ptr<BytePacket>> ext_data_; //external data //Program scopes: std::stack<std::pair<std::string,ScopeId>> scopes_; //TAProL scope stack: {Scope name, Scope Id} //Tensor operation factory: TensorOpFactory * tensor_op_factory_; //tensor operation factory (non-owning pointer) //Configuration: int logging_; //logging level std::ofstream logfile_; //log file std::string comp_backend_; //current computational backend int num_processes_; //total number of parallel processes in the dedicated MPI communicator int process_rank_; //rank of the current parallel process in the dedicated MPI communicator int global_process_rank_; //rank of the current parallel process in MPI_COMM_WORLD Loading src/exatn/tests/NumServerTester.cpp +4 −1 Original line number Diff line number Diff line Loading @@ -3807,6 +3807,8 @@ TEST(NumServerTester, CuTensorNet) { success = exatn::initTensorRnd("C"); assert(success); success = exatn::initTensor("D",0.0); assert(success); exatn::switchComputationalBackend("default"); //Contract tensor network: int num_repeats = NUM_REPEATS; while(--num_repeats >= 0){ Loading @@ -3814,8 +3816,9 @@ TEST(NumServerTester, CuTensorNet) { std::cout << "D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y): "; auto flops = exatn::getTotalFlopCount(); auto time_start = exatn::Timer::timeInSecHR(); success = exatn::evaluateTensorNetworkSync("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)"); success = exatn::evaluateTensorNetwork("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)"); assert(success); success = exatn::sync("D"); assert(success); auto duration = exatn::Timer::timeInSecHR(time_start); flops = exatn::getTotalFlopCount() - flops; std::cout << "Performance = " << (flops / (1e9 * duration)) << " Gflop/s" << std::endl; Loading Loading
development.txt +5 −5 Original line number Diff line number Diff line Loading @@ -9,7 +9,7 @@ ISSUES: That is, the order of tensor operations across all participating processes must be consistent such that every encountered global tensor operation will receive the same tensor operand irrespective of the difference in the locally generated tensor name. Special of the difference in the locally generated tensor names. Special care needs to be taken in iterating over associative tensor containers, to ensure that the keys are consistent accross all participating processes. For example, automatically generated tensor names Loading @@ -21,9 +21,7 @@ ISSUES: BUGS: - 32-bit integer MPI message chunking issue in the backend. - Fix the bug(s) in the tensor order reduction mechanism in the TalshExecutor backend. - Fix the bug(s) in the tensor order reduction mechanism in the TalshNodeExecutor backend. FEATURES: Loading @@ -39,11 +37,13 @@ FEATURES: Contract replaced tensors, then replace the contracted tensor with a new tensor (sub)network. - Implement the Renormalization procedure. - Implement SAVE/LOAD API for TensorExpansion. - Implement TensorNetwork slice computing Generator. - Implement b-D procedure. - Implement bl-D procedure. - Implement conjugate gradient optimization procedure. Loading
src/exatn/exatn_numerics.hpp +9 −3 Original line number Diff line number Diff line /** ExaTN::Numerics: General client header (free function API) REVISION: 2021/10/30 REVISION: 2022/01/07 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/ /** Rationale: 1. Vector space and subspace registration [spaces.hpp, space_register.hpp]: Loading Loading @@ -1086,6 +1086,12 @@ inline std::shared_ptr<exatn::TensorNetwork> makeTensorNetwork(const std::string // INTERNAL CONTROL API // ////////////////////////// /** Switches the computational backend: {"default","cuquantum"}. Only applies to tensor network execution. **/ inline void switchComputationalBackend(const std::string & backend_name) {return numericalServer->switchComputationalBackend(backend_name);} /** Resets the tensor contraction sequence optimizer that is invoked when evaluating tensor networks: {dummy,heuro,greed,metis}. **/ inline void resetContrSeqOptimizer(const std::string & optimizer_name) Loading
src/exatn/num_server.cpp +64 −7 Original line number Diff line number Diff line /** ExaTN::Numerics: Numerical server REVISION: 2021/12/10 REVISION: 2022/01/07 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/ #include "num_server.hpp" #include "tensor_range.hpp" Loading Loading @@ -89,7 +89,8 @@ NumServer::NumServer(const MPICommProxy & communicator, const ParamConf & parameters, const std::string & graph_executor_name, const std::string & node_executor_name): contr_seq_optimizer_("metis"), contr_seq_caching_(false), logging_(0), intra_comm_(communicator), validation_tracing_(false) contr_seq_optimizer_("metis"), contr_seq_caching_(false), logging_(0), comp_backend_("default"), intra_comm_(communicator), validation_tracing_(false) { int mpi_error = MPI_Comm_size(*(communicator.get<MPI_Comm>()),&num_processes_); assert(mpi_error == MPI_SUCCESS); mpi_error = MPI_Comm_rank(*(communicator.get<MPI_Comm>()),&process_rank_); assert(mpi_error == MPI_SUCCESS); Loading Loading @@ -117,7 +118,8 @@ NumServer::NumServer(const MPICommProxy & communicator, NumServer::NumServer(const ParamConf & parameters, const std::string & graph_executor_name, const std::string & node_executor_name): contr_seq_optimizer_("metis"), contr_seq_caching_(false), logging_(0), validation_tracing_(false) contr_seq_optimizer_("metis"), contr_seq_caching_(false), logging_(0), comp_backend_("default"), validation_tracing_(false) { num_processes_ = 1; process_rank_ = 0; global_process_rank_ = 0; process_world_ = std::make_shared<ProcessGroup>(intra_comm_,num_processes_); //intra-communicator is empty here Loading Loading @@ -194,6 +196,22 @@ void NumServer::reconfigureTensorRuntime(const ParamConf & parameters, } #endif void NumServer::switchComputationalBackend(const std::string & backend_name) { bool success = sync(); assert(success); if(backend_name == "default"){ comp_backend_ = backend_name; #ifdef CUQUANTUM }else if(backend_name == "cuquantum"){ comp_backend_ = backend_name; #endif }else{ std::cout << "#ERROR(exatn::NumServer): switchComputationalBackend: Unknown backend: " << backend_name << std::endl; std::abort(); } return; } void NumServer::resetContrSeqOptimizer(const std::string & optimizer_name, bool caching) { contr_seq_optimizer_ = optimizer_name; Loading Loading @@ -612,7 +630,7 @@ bool NumServer::submit(const ProcessGroup & process_group, //Determine parallel execution configuration: unsigned int local_rank; //local process rank within the process group if(!process_group.rankIsIn(process_rank_,&local_rank)) return true; //process is not in the group: Do nothing assert(network.isValid()); //debug //assert(network.isValid()); //debug unsigned int num_procs = process_group.getSize(); //number of executing processes assert(local_rank < num_procs); if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart()) Loading Loading @@ -883,6 +901,30 @@ bool NumServer::submit(const ProcessGroup & process_group, bool NumServer::submit(const ProcessGroup & process_group, std::shared_ptr<TensorNetwork> network) { #ifdef CUQUANTUM //Try execution via an alternative computational backend: if(comp_backend_ == "cuquantum"){ //Determine parallel execution configuration: unsigned int local_rank; //local process rank within the process group if(!process_group.rankIsIn(process_rank_,&local_rank)) return true; //process is not in the group: Do nothing //assert(network->isValid()); //debug unsigned int num_procs = process_group.getSize(); //number of executing processes assert(local_rank < num_procs); if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart()) << "]: Submitting tensor network <" << network->getName() << "> (" << network->getTensor(0)->getName() << ") for execution via cuQuantum by " << num_procs << " processes with memory limit " << process_group.getMemoryLimitPerProcess() << " bytes" << std::endl << std::flush; if(logging_ > 0) network->printItFile(logfile_); const auto exec_handle = tensor_rt_->submit(network,process_group.getMPICommProxy(),num_procs,local_rank); bool success = (exec_handle != 0); if(success){ auto res = tn_exec_handles_.emplace(std::make_pair(network->getTensor(0)->getTensorHash(),exec_handle)); success = res.second; if(success && logging_ > 0) logfile_ << "Number of submitted networks via cuQuantum = 1" << std::endl << std::flush; } return success; } #endif if(network) return submit(process_group,*network); return false; } Loading Loading @@ -1030,6 +1072,14 @@ bool NumServer::sync(const ProcessGroup & process_group, const Tensor & tensor, { bool success = true; if(!process_group.rankIsIn(process_rank_)) return success; //process is not in the group: Do nothing #ifdef CUQUANTUM if(comp_backend_ == "cuquantum"){ auto iter = tn_exec_handles_.find(tensor.getTensorHash()); bool synced = (iter == tn_exec_handles_.end()); if(!synced) synced = tensor_rt_->syncNetwork(iter->second,wait); return synced; } #endif auto iter = tensors_.find(tensor.getName()); if(iter != tensors_.end()){ if(iter->second->isComposite()){ Loading Loading @@ -1081,7 +1131,11 @@ bool NumServer::sync(const ProcessGroup & process_group, TensorNetwork & network bool NumServer::sync(bool wait) { return sync(getCurrentProcessGroup(),wait); bool success = sync(getCurrentProcessGroup(),wait); #ifdef CUQUANTUM if(comp_backend_ == "cuquantum" && success) tn_exec_handles_.clear(); #endif return success; } bool NumServer::sync(const ProcessGroup & process_group, bool wait) Loading @@ -1092,6 +1146,9 @@ bool NumServer::sync(const ProcessGroup & process_group, bool wait) if(success){ if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart()) << "]: Locally synchronized all operations" << std::endl << std::flush; #ifdef CUQUANTUM if(comp_backend_ == "cuquantum") tn_exec_handles_.clear(); #endif #ifdef MPI_ENABLED if(wait){ auto errc = MPI_Barrier(process_group.getMPICommProxy().getRef<MPI_Comm>()); Loading
src/exatn/num_server.hpp +19 −3 Original line number Diff line number Diff line /** ExaTN::Numerics: Numerical server REVISION: 2021/12/22 REVISION: 2022/01/07 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/ /** Rationale: (a) Numerical server provides basic tensor network processing functionality: Loading Loading @@ -270,6 +270,9 @@ public: const std::string & node_executor_name); #endif /** Switches the computational backend. **/ void switchComputationalBackend(const std::string & backend_name); /** Resets the tensor contraction sequence optimizer that is invoked when evaluating tensor networks. **/ void resetContrSeqOptimizer(const std::string & optimizer_name, //in: tensor contraction sequence optimizer name Loading Loading @@ -1032,25 +1035,38 @@ protected: private: //Spaces: std::shared_ptr<numerics::SpaceRegister> space_register_; //register of vector spaces and their named subspaces std::unordered_map<std::string,SpaceId> subname2id_; //maps a subspace name to its parental vector space id //Tensors: std::unordered_map<std::string,std::shared_ptr<Tensor>> tensors_; //registered tensors (by CREATE operation) std::map<std::string,std::shared_ptr<Tensor>> implicit_tensors_; //tensors created implicitly by the runtime (for garbage collection) std::unordered_map<std::string,ProcessGroup> tensor_comms_; //process group associated with each tensor #ifdef CUQUANTUM //Tensor network execution handles: std::unordered_map<numerics::TensorHashType,runtime::TensorOpExecHandle> tn_exec_handles_; #endif //Contraction path optimizer: std::string contr_seq_optimizer_; //tensor contraction sequence optimizer invoked when evaluating tensor networks bool contr_seq_caching_; //regulates whether or not to cache pseudo-optimal tensor contraction orders for later reuse //Registered external methods and data: std::map<std::string,std::shared_ptr<TensorMethod>> ext_methods_; //external tensor methods std::map<std::string,std::shared_ptr<BytePacket>> ext_data_; //external data //Program scopes: std::stack<std::pair<std::string,ScopeId>> scopes_; //TAProL scope stack: {Scope name, Scope Id} //Tensor operation factory: TensorOpFactory * tensor_op_factory_; //tensor operation factory (non-owning pointer) //Configuration: int logging_; //logging level std::ofstream logfile_; //log file std::string comp_backend_; //current computational backend int num_processes_; //total number of parallel processes in the dedicated MPI communicator int process_rank_; //rank of the current parallel process in the dedicated MPI communicator int global_process_rank_; //rank of the current parallel process in MPI_COMM_WORLD Loading
src/exatn/tests/NumServerTester.cpp +4 −1 Original line number Diff line number Diff line Loading @@ -3807,6 +3807,8 @@ TEST(NumServerTester, CuTensorNet) { success = exatn::initTensorRnd("C"); assert(success); success = exatn::initTensor("D",0.0); assert(success); exatn::switchComputationalBackend("default"); //Contract tensor network: int num_repeats = NUM_REPEATS; while(--num_repeats >= 0){ Loading @@ -3814,8 +3816,9 @@ TEST(NumServerTester, CuTensorNet) { std::cout << "D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y): "; auto flops = exatn::getTotalFlopCount(); auto time_start = exatn::Timer::timeInSecHR(); success = exatn::evaluateTensorNetworkSync("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)"); success = exatn::evaluateTensorNetwork("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)"); assert(success); success = exatn::sync("D"); assert(success); auto duration = exatn::Timer::timeInSecHR(time_start); flops = exatn::getTotalFlopCount() - flops; std::cout << "Performance = " << (flops / (1e9 * duration)) << " Gflop/s" << std::endl; Loading