Loading src/runtime/executor/cuquantum/cuquantum_executor.cu +18 −15 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -16,6 +16,7 @@ Rationale: #include <vector> #include <unordered_map> #include <type_traits> #include <iostream> Loading Loading @@ -57,43 +58,45 @@ struct TensorNetworkReq { }; CuQuantumExecutor::CuQuantumExecutor() CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): tensor_data_access_func_(std::move(tensor_data_access_func)) { static_assert(std::is_same<cutensornetHandle_t,void*>::value,"#FATAL(exatn::runtime::CuQuantumExecutor): cutensornetHandle_t != (void*)"); const size_t version = cutensornetGetVersion(); std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): cuTensorNet backend version " << version << std::endl; int num_gpus = 0; auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS); for(int i = 0; i < num_gpus; ++i){ if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpus.emplace_back(i); if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpus_.emplace_back(i); } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpus.size() << std::endl; std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpus_.size() << std::endl; ctn_handles.resize(gpus.size()); for(const auto & gpu_id: gpus){ ctn_handles_.resize(gpus_.size()); for(const auto & gpu_id: gpus_){ HANDLE_CUDA_ERROR(cudaSetDevice(gpu_id)); HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&ctn_handles[gpu_id]))); HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&ctn_handles_[gpu_id]))); } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl; } CuQuantumExecutor::~CuQuantumExecutor() { bool success = sync(); assert(success); for(const auto & gpu_id: gpus){ for(const auto & gpu_id: gpus_){ HANDLE_CUDA_ERROR(cudaSetDevice(gpu_id)); HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(ctn_handles[gpu_id]))); HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(ctn_handles_[gpu_id]))); } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Destroyed cuTensorNet contexts for all available GPUs" << std::endl; ctn_handles.clear(); gpus.clear(); ctn_handles_.clear(); gpus_.clear(); } int CuQuantumExecutor::execute(std::shared_ptr<numerics::TensorNetwork> network, TensorOpExecHandle exec_handle) const TensorOpExecHandle exec_handle) { int error_code = 0; //`Finish Loading @@ -101,14 +104,14 @@ int CuQuantumExecutor::execute(std::shared_ptr<numerics::TensorNetwork> network, } bool CuQuantumExecutor::executing(TensorOpExecHandle exec_handle) bool CuQuantumExecutor::executing(const TensorOpExecHandle exec_handle) { auto iter = active_networks_.find(exec_handle); return (iter != active_networks_.end()); } bool CuQuantumExecutor::sync(TensorOpExecHandle exec_handle, bool CuQuantumExecutor::sync(const TensorOpExecHandle exec_handle, int * error_code, bool wait) { Loading src/runtime/executor/cuquantum/cuquantum_executor.hpp +19 −7 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -18,19 +18,29 @@ Rationale: #include <unordered_map> #include <vector> #include <functional> #include "tensor_network_queue.hpp" namespace talsh{ class Tensor; } namespace exatn { namespace runtime { using TensorImplFunc = std::function<const void*(const numerics::Tensor &, int, int, std::size_t *)>; using TensorImplTalshFunc = std::function<std::shared_ptr<talsh::Tensor>(const numerics::Tensor &, int, int)>; struct TensorNetworkReq; class CuQuantumExecutor { public: CuQuantumExecutor(); CuQuantumExecutor(TensorImplFunc tensor_data_access_func); CuQuantumExecutor(const CuQuantumExecutor &) = delete; CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete; CuQuantumExecutor(CuQuantumExecutor &&) noexcept = delete; Loading @@ -38,11 +48,11 @@ public: virtual ~CuQuantumExecutor(); int execute(std::shared_ptr<numerics::TensorNetwork> network, TensorOpExecHandle exec_handle); const TensorOpExecHandle exec_handle); bool executing(TensorOpExecHandle exec_handle); bool executing(const TensorOpExecHandle exec_handle); bool sync(TensorOpExecHandle exec_handle, bool sync(const TensorOpExecHandle exec_handle, int * error_code, bool wait = true); Loading @@ -53,9 +63,11 @@ protected: /** Currently processed tensor networks **/ std::unordered_map<TensorOpExecHandle,std::shared_ptr<TensorNetworkReq>> active_networks_; /** GPU Ids available to the current process **/ std::vector<int> gpus; std::vector<int> gpus_; /** cuTensorNet contexts for all available GPUs **/ std::vector<void*> ctn_handles; //cutensornetHandle_t = void* std::vector<void*> ctn_handles_; //cutensornetHandle_t = void* /** Tensor data access function **/ TensorImplFunc tensor_data_access_func_; }; } //namespace runtime Loading src/runtime/executor/cuquantum/tensor_network_queue.hpp +55 −3 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: Execution queue REVISION: 2021/12/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -17,6 +17,7 @@ Rationale: #include "tensor_network.hpp" #include "tensor_operation.hpp" #include <unordered_map> #include <list> #include <memory> #include <atomic> Loading @@ -31,6 +32,15 @@ class TensorNetworkQueue { public: //Tensor network execution status: enum class ExecStat { None, //no execution status Idle, //submitted but execution has not yet started Preparing, //preparation for execution has started Executing, //actual execution (numerical computation) has started Completed //execution completed }; using TensorNetworkQueueIterator = std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,TensorOpExecHandle>>::iterator; Loading @@ -51,6 +61,7 @@ public: ConstTensorNetworkQueueIterator cbegin() {return networks_.cbegin();} ConstTensorNetworkQueueIterator cend() {return networks_.cend();} /** Returns TRUE is the tensor network queue is empty, FALSE otherwise. **/ bool isEmpty() { lock(); bool empt = networks_.empty(); Loading @@ -58,6 +69,7 @@ public: return empt; } /** Returns the current size of the tensor network queue. **/ std::size_t getSize() { lock(); const std::size_t current_size = networks_.size(); Loading @@ -65,26 +77,57 @@ public: return current_size; } /** Appends a new tensor network to the queue (no repeats allowed). Upon success, returns a positive execution handle, zero otherwise. **/ TensorOpExecHandle append(std::shared_ptr<numerics::TensorNetwork> network) { lock(); const TensorOpExecHandle tn_hash = getTensorNetworkHash(network); TensorOpExecHandle tn_hash = getTensorNetworkHash(network); auto res = tn_exec_stat_.emplace(std::make_pair(tn_hash,ExecStat::Idle)); if(res.second){ networks_.emplace_back(std::make_pair(network,tn_hash)); }else{ tn_hash = 0; } unlock(); return tn_hash; } /** Removes the tensor network currently pointed to from the queue. The tensor network execution status must be marked Completed. **/ void remove() { lock(); assert(current_network_ != networks_.end()); auto iter = tn_exec_stat_.find(current_network_->second); if(iter != tn_exec_stat_.end()){ if(iter->second == ExecStat::Completed){ tn_exec_stat_.erase(iter); }else{ std::cout << "#ERROR(exatn::runtime::TensorNetworkQueue): Attempt to delete an unfinished tensor network!\n"; assert(false); } } current_network_ = networks_.erase(current_network_); unlock(); return; } /** Returns the execution status associated with the given tensor network execution handle. **/ ExecStat checkExecStatus(const TensorOpExecHandle exec_handle) { auto exec_stat = ExecStat::None; lock(); auto iter = tn_exec_stat_.find(exec_handle); if(iter != tn_exec_stat_.cend()) exec_stat = iter->second; unlock(); return exec_stat; } /** Returns the constant iterator to the current tensor network. **/ ConstTensorNetworkQueueIterator getCurrent() { return current_network_; } /** Returns the current iterator to the beginning of the queue. **/ void reset() { lock(); current_network_ = networks_.begin(); Loading @@ -92,6 +135,8 @@ public: return; } /** Returns TRUE if the current iterator is positioned after the end of the queue, FALSE otherwise. **/ bool isOver() { lock(); bool over = (current_network_ == networks_.end()); Loading @@ -99,6 +144,9 @@ public: return over; } /** Moves the current iterator to the next element of the queue. If moved past the end, return FALSE, otherwise TRUE. The current iterator must be valid on entrance. **/ bool next() { lock(); assert(current_network_ != networks_.end()); Loading @@ -108,14 +156,18 @@ public: return not_over; } /** Locks. **/ inline void lock(){queue_lock_.lock();} inline void unlock(){queue_lock_.unlock();} protected: /** Tensor network execution status **/ std::unordered_map<TensorOpExecHandle,ExecStat> tn_exec_stat_; /** Queue of tensor networks to be executed **/ std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>, TensorOpExecHandle>> networks_; /** Tensor network iterator **/ TensorNetworkQueueIterator current_network_; std::mutex queue_lock_; }; Loading src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp +18 −7 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy REVISION: 2021/12/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -30,7 +30,14 @@ void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> no { TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank); #ifdef CUQUANTUM if(node_executor) cuquantum_executor_ = std::make_shared<CuQuantumExecutor>(); if(node_executor){ cuquantum_executor_ = std::make_shared<CuQuantumExecutor>( [this](const numerics::Tensor & tensor, int device_kind, int device_id, std::size_t * size){ const void * data_ptr = this->node_executor_->getTensorImage(tensor,device_kind,device_id,size); return data_ptr; } ); } #endif return; } Loading Loading @@ -268,10 +275,12 @@ void LazyGraphExecutor::execute(TensorGraph & dag) { void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Started executing the tensor network queue via cuQuantum\n"; std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Started executing the tensor network queue via cuQuantum: " << tensor_network_queue.getSize() << " elements detected" << std::endl; #ifdef CUQUANTUM assert(node_executor_); //Synchronize the node executor: node_executor_->sync(); bool synced = node_executor_->sync(); assert(synced); node_executor_->clearCache(); //Process the tensor network queue: while(!tensor_network_queue.isEmpty()){ Loading @@ -282,7 +291,7 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { const auto exec_handle = current->second; if(cuquantum_executor_->executing(exec_handle)){ int error_code = 0; auto synced = cuquantum_executor_->sync(exec_handle,&error_code,false); synced = cuquantum_executor_->sync(exec_handle,&error_code,false); assert(error_code == 0); if(synced){ tensor_network_queue.remove(); Loading @@ -299,7 +308,9 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { } } } bool synced = cuquantum_executor_->sync(); assert(synced); synced = cuquantum_executor_->sync(); assert(synced); #else assert(tensor_network_queue.isEmpty()); #endif std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Finished executing the tensor network queue via cuQuantum\n"; return; Loading src/runtime/executor/node_executors/exatensor/node_executor_exatensor.hpp +6 −1 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph node executor: Exatensor REVISION: 2021/21/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -87,6 +87,11 @@ public: std::shared_ptr<talsh::Tensor> getLocalTensor(const numerics::Tensor & tensor, const std::vector<std::pair<DimOffset,DimExtent>> & slice_spec) override; const void * getTensorImage(const numerics::Tensor & tensor, int device_kind, int device_id, std::size_t * size = nullptr) const override {return nullptr;} const std::string name() const override {return "exatensor-node-executor";} const std::string description() const override {return "ExaTENSOR tensor graph node executor";} std::shared_ptr<TensorNodeExecutor> clone() override {return std::make_shared<ExatensorNodeExecutor>();} Loading Loading
src/runtime/executor/cuquantum/cuquantum_executor.cu +18 −15 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -16,6 +16,7 @@ Rationale: #include <vector> #include <unordered_map> #include <type_traits> #include <iostream> Loading Loading @@ -57,43 +58,45 @@ struct TensorNetworkReq { }; CuQuantumExecutor::CuQuantumExecutor() CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func): tensor_data_access_func_(std::move(tensor_data_access_func)) { static_assert(std::is_same<cutensornetHandle_t,void*>::value,"#FATAL(exatn::runtime::CuQuantumExecutor): cutensornetHandle_t != (void*)"); const size_t version = cutensornetGetVersion(); std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): cuTensorNet backend version " << version << std::endl; int num_gpus = 0; auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS); for(int i = 0; i < num_gpus; ++i){ if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpus.emplace_back(i); if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpus_.emplace_back(i); } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpus.size() << std::endl; std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpus_.size() << std::endl; ctn_handles.resize(gpus.size()); for(const auto & gpu_id: gpus){ ctn_handles_.resize(gpus_.size()); for(const auto & gpu_id: gpus_){ HANDLE_CUDA_ERROR(cudaSetDevice(gpu_id)); HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&ctn_handles[gpu_id]))); HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&ctn_handles_[gpu_id]))); } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl; } CuQuantumExecutor::~CuQuantumExecutor() { bool success = sync(); assert(success); for(const auto & gpu_id: gpus){ for(const auto & gpu_id: gpus_){ HANDLE_CUDA_ERROR(cudaSetDevice(gpu_id)); HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(ctn_handles[gpu_id]))); HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(ctn_handles_[gpu_id]))); } std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Destroyed cuTensorNet contexts for all available GPUs" << std::endl; ctn_handles.clear(); gpus.clear(); ctn_handles_.clear(); gpus_.clear(); } int CuQuantumExecutor::execute(std::shared_ptr<numerics::TensorNetwork> network, TensorOpExecHandle exec_handle) const TensorOpExecHandle exec_handle) { int error_code = 0; //`Finish Loading @@ -101,14 +104,14 @@ int CuQuantumExecutor::execute(std::shared_ptr<numerics::TensorNetwork> network, } bool CuQuantumExecutor::executing(TensorOpExecHandle exec_handle) bool CuQuantumExecutor::executing(const TensorOpExecHandle exec_handle) { auto iter = active_networks_.find(exec_handle); return (iter != active_networks_.end()); } bool CuQuantumExecutor::sync(TensorOpExecHandle exec_handle, bool CuQuantumExecutor::sync(const TensorOpExecHandle exec_handle, int * error_code, bool wait) { Loading
src/runtime/executor/cuquantum/cuquantum_executor.hpp +19 −7 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum REVISION: 2021/12/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -18,19 +18,29 @@ Rationale: #include <unordered_map> #include <vector> #include <functional> #include "tensor_network_queue.hpp" namespace talsh{ class Tensor; } namespace exatn { namespace runtime { using TensorImplFunc = std::function<const void*(const numerics::Tensor &, int, int, std::size_t *)>; using TensorImplTalshFunc = std::function<std::shared_ptr<talsh::Tensor>(const numerics::Tensor &, int, int)>; struct TensorNetworkReq; class CuQuantumExecutor { public: CuQuantumExecutor(); CuQuantumExecutor(TensorImplFunc tensor_data_access_func); CuQuantumExecutor(const CuQuantumExecutor &) = delete; CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete; CuQuantumExecutor(CuQuantumExecutor &&) noexcept = delete; Loading @@ -38,11 +48,11 @@ public: virtual ~CuQuantumExecutor(); int execute(std::shared_ptr<numerics::TensorNetwork> network, TensorOpExecHandle exec_handle); const TensorOpExecHandle exec_handle); bool executing(TensorOpExecHandle exec_handle); bool executing(const TensorOpExecHandle exec_handle); bool sync(TensorOpExecHandle exec_handle, bool sync(const TensorOpExecHandle exec_handle, int * error_code, bool wait = true); Loading @@ -53,9 +63,11 @@ protected: /** Currently processed tensor networks **/ std::unordered_map<TensorOpExecHandle,std::shared_ptr<TensorNetworkReq>> active_networks_; /** GPU Ids available to the current process **/ std::vector<int> gpus; std::vector<int> gpus_; /** cuTensorNet contexts for all available GPUs **/ std::vector<void*> ctn_handles; //cutensornetHandle_t = void* std::vector<void*> ctn_handles_; //cutensornetHandle_t = void* /** Tensor data access function **/ TensorImplFunc tensor_data_access_func_; }; } //namespace runtime Loading
src/runtime/executor/cuquantum/tensor_network_queue.hpp +55 −3 Original line number Diff line number Diff line /** ExaTN: Tensor Runtime: Tensor network executor: Execution queue REVISION: 2021/12/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading @@ -17,6 +17,7 @@ Rationale: #include "tensor_network.hpp" #include "tensor_operation.hpp" #include <unordered_map> #include <list> #include <memory> #include <atomic> Loading @@ -31,6 +32,15 @@ class TensorNetworkQueue { public: //Tensor network execution status: enum class ExecStat { None, //no execution status Idle, //submitted but execution has not yet started Preparing, //preparation for execution has started Executing, //actual execution (numerical computation) has started Completed //execution completed }; using TensorNetworkQueueIterator = std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,TensorOpExecHandle>>::iterator; Loading @@ -51,6 +61,7 @@ public: ConstTensorNetworkQueueIterator cbegin() {return networks_.cbegin();} ConstTensorNetworkQueueIterator cend() {return networks_.cend();} /** Returns TRUE is the tensor network queue is empty, FALSE otherwise. **/ bool isEmpty() { lock(); bool empt = networks_.empty(); Loading @@ -58,6 +69,7 @@ public: return empt; } /** Returns the current size of the tensor network queue. **/ std::size_t getSize() { lock(); const std::size_t current_size = networks_.size(); Loading @@ -65,26 +77,57 @@ public: return current_size; } /** Appends a new tensor network to the queue (no repeats allowed). Upon success, returns a positive execution handle, zero otherwise. **/ TensorOpExecHandle append(std::shared_ptr<numerics::TensorNetwork> network) { lock(); const TensorOpExecHandle tn_hash = getTensorNetworkHash(network); TensorOpExecHandle tn_hash = getTensorNetworkHash(network); auto res = tn_exec_stat_.emplace(std::make_pair(tn_hash,ExecStat::Idle)); if(res.second){ networks_.emplace_back(std::make_pair(network,tn_hash)); }else{ tn_hash = 0; } unlock(); return tn_hash; } /** Removes the tensor network currently pointed to from the queue. The tensor network execution status must be marked Completed. **/ void remove() { lock(); assert(current_network_ != networks_.end()); auto iter = tn_exec_stat_.find(current_network_->second); if(iter != tn_exec_stat_.end()){ if(iter->second == ExecStat::Completed){ tn_exec_stat_.erase(iter); }else{ std::cout << "#ERROR(exatn::runtime::TensorNetworkQueue): Attempt to delete an unfinished tensor network!\n"; assert(false); } } current_network_ = networks_.erase(current_network_); unlock(); return; } /** Returns the execution status associated with the given tensor network execution handle. **/ ExecStat checkExecStatus(const TensorOpExecHandle exec_handle) { auto exec_stat = ExecStat::None; lock(); auto iter = tn_exec_stat_.find(exec_handle); if(iter != tn_exec_stat_.cend()) exec_stat = iter->second; unlock(); return exec_stat; } /** Returns the constant iterator to the current tensor network. **/ ConstTensorNetworkQueueIterator getCurrent() { return current_network_; } /** Returns the current iterator to the beginning of the queue. **/ void reset() { lock(); current_network_ = networks_.begin(); Loading @@ -92,6 +135,8 @@ public: return; } /** Returns TRUE if the current iterator is positioned after the end of the queue, FALSE otherwise. **/ bool isOver() { lock(); bool over = (current_network_ == networks_.end()); Loading @@ -99,6 +144,9 @@ public: return over; } /** Moves the current iterator to the next element of the queue. If moved past the end, return FALSE, otherwise TRUE. The current iterator must be valid on entrance. **/ bool next() { lock(); assert(current_network_ != networks_.end()); Loading @@ -108,14 +156,18 @@ public: return not_over; } /** Locks. **/ inline void lock(){queue_lock_.lock();} inline void unlock(){queue_lock_.unlock();} protected: /** Tensor network execution status **/ std::unordered_map<TensorOpExecHandle,ExecStat> tn_exec_stat_; /** Queue of tensor networks to be executed **/ std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>, TensorOpExecHandle>> networks_; /** Tensor network iterator **/ TensorNetworkQueueIterator current_network_; std::mutex queue_lock_; }; Loading
src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp +18 −7 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy REVISION: 2021/12/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -30,7 +30,14 @@ void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> no { TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank); #ifdef CUQUANTUM if(node_executor) cuquantum_executor_ = std::make_shared<CuQuantumExecutor>(); if(node_executor){ cuquantum_executor_ = std::make_shared<CuQuantumExecutor>( [this](const numerics::Tensor & tensor, int device_kind, int device_id, std::size_t * size){ const void * data_ptr = this->node_executor_->getTensorImage(tensor,device_kind,device_id,size); return data_ptr; } ); } #endif return; } Loading Loading @@ -268,10 +275,12 @@ void LazyGraphExecutor::execute(TensorGraph & dag) { void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Started executing the tensor network queue via cuQuantum\n"; std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Started executing the tensor network queue via cuQuantum: " << tensor_network_queue.getSize() << " elements detected" << std::endl; #ifdef CUQUANTUM assert(node_executor_); //Synchronize the node executor: node_executor_->sync(); bool synced = node_executor_->sync(); assert(synced); node_executor_->clearCache(); //Process the tensor network queue: while(!tensor_network_queue.isEmpty()){ Loading @@ -282,7 +291,7 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { const auto exec_handle = current->second; if(cuquantum_executor_->executing(exec_handle)){ int error_code = 0; auto synced = cuquantum_executor_->sync(exec_handle,&error_code,false); synced = cuquantum_executor_->sync(exec_handle,&error_code,false); assert(error_code == 0); if(synced){ tensor_network_queue.remove(); Loading @@ -299,7 +308,9 @@ void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { } } } bool synced = cuquantum_executor_->sync(); assert(synced); synced = cuquantum_executor_->sync(); assert(synced); #else assert(tensor_network_queue.isEmpty()); #endif std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Finished executing the tensor network queue via cuQuantum\n"; return; Loading
src/runtime/executor/node_executors/exatensor/node_executor_exatensor.hpp +6 −1 Original line number Diff line number Diff line /** ExaTN:: Tensor Runtime: Tensor graph node executor: Exatensor REVISION: 2021/21/24 REVISION: 2021/12/27 Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Loading Loading @@ -87,6 +87,11 @@ public: std::shared_ptr<talsh::Tensor> getLocalTensor(const numerics::Tensor & tensor, const std::vector<std::pair<DimOffset,DimExtent>> & slice_spec) override; const void * getTensorImage(const numerics::Tensor & tensor, int device_kind, int device_id, std::size_t * size = nullptr) const override {return nullptr;} const std::string name() const override {return "exatensor-node-executor";} const std::string description() const override {return "ExaTENSOR tensor graph node executor";} std::shared_ptr<TensorNodeExecutor> clone() override {return std::make_shared<ExatensorNodeExecutor>();} Loading