diff --git a/src/exatn/num_server.hpp b/src/exatn/num_server.hpp index de9b573e29ca2162a98a25a6dbbffa5eed557c12..07c75b9838f10cd1785f6ac6e13b04700208bdd0 100644 --- a/src/exatn/num_server.hpp +++ b/src/exatn/num_server.hpp @@ -1,5 +1,5 @@ /** ExaTN::Numerics: Numerical server -REVISION: 2021/11/03 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ @@ -84,6 +84,7 @@ namespace exatn{ //Primary numerics:: types exposed to the user: using numerics::VectorSpace; using numerics::Subspace; +using numerics::TensorHashType; using numerics::TensorRange; using numerics::TensorShape; using numerics::TensorSignature; diff --git a/src/exatn/tests/NumServerTester.cpp b/src/exatn/tests/NumServerTester.cpp index c280fac1b078dc7c78952837762463ff3c655d91..ce4d34db25adfce8512cdcc44461a656d71d4510 100644 --- a/src/exatn/tests/NumServerTester.cpp +++ b/src/exatn/tests/NumServerTester.cpp @@ -18,7 +18,7 @@ #include "errors.hpp" //Test activation: -#define EXATN_TEST0 +//#define EXATN_TEST0 /*#define EXATN_TEST1 #define EXATN_TEST2 #define EXATN_TEST3 @@ -44,11 +44,11 @@ #define EXATN_TEST23 #define EXATN_TEST24 #define EXATN_TEST25 -#define EXATN_TEST26 +#define EXATN_TEST26*/ //#define EXATN_TEST27 //requires input file from source //#define EXATN_TEST28 //requires input file from source -#define EXATN_TEST29*/ -#define EXATN_TEST30 +#define EXATN_TEST29 +//#define EXATN_TEST30 //#define EXATN_TEST31 //requires input file from source //#define EXATN_TEST32 diff --git a/src/numerics/tensor_network.cpp b/src/numerics/tensor_network.cpp index 4814f596b6e68a79516e6e5e4fc7876b4666714e..18776256487578512cd736718341d9dedd11e045 100644 --- a/src/numerics/tensor_network.cpp +++ b/src/numerics/tensor_network.cpp @@ -1,5 +1,5 @@ /** ExaTN::Numerics: Tensor network -REVISION: 2021/10/26 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ @@ -2779,4 +2779,10 @@ bool TensorNetwork::printTensorNetwork(std::string & network) } //namespace numerics + +numerics::TensorHashType getTensorNetworkHash(std::shared_ptr<numerics::TensorNetwork> network) +{ + return reinterpret_cast<numerics::TensorHashType>((void*)(network.get())); +} + } //namespace exatn diff --git a/src/numerics/tensor_network.hpp b/src/numerics/tensor_network.hpp index c311864de90dd0639f13c7c68cc48b66254ec00a..ddbf5d3f6dd337bb68221bb59e600fd437d453ad 100644 --- a/src/numerics/tensor_network.hpp +++ b/src/numerics/tensor_network.hpp @@ -1,5 +1,5 @@ /** ExaTN::Numerics: Tensor network -REVISION: 2021/10/26 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ @@ -763,6 +763,9 @@ inline std::shared_ptr<numerics::TensorNetwork> makeSharedTensorNetwork(Args&&.. return std::make_shared<numerics::TensorNetwork>(std::forward<Args>(args)...); } +/** Returns the hash of a tensor network. **/ +numerics::TensorHashType getTensorNetworkHash(std::shared_ptr<numerics::TensorNetwork> network); + } //namespace exatn #endif //EXATN_NUMERICS_TENSOR_NETWORK_HPP_ diff --git a/src/runtime/executor/cuquantum/CMakeLists.txt b/src/runtime/executor/cuquantum/CMakeLists.txt index 57df9ef7da164857b141438ccbf132446b9a07d2..643a0d2c02430d4fff96e0f05f39a83c5b0e5980 100644 --- a/src/runtime/executor/cuquantum/CMakeLists.txt +++ b/src/runtime/executor/cuquantum/CMakeLists.txt @@ -25,7 +25,7 @@ endif() if(CUTENSOR AND NOT CUTENSOR_PATH STREQUAL ".") target_include_directories(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/include) - target_link_libraries(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/lib/11/libcutensor.so) + target_link_libraries(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/lib/11/libcutensor.so ExaTensor::ExaTensor) endif() exatn_configure_plugin_rpath(${LIBRARY_NAME}) diff --git a/src/runtime/executor/cuquantum/cuquantum_executor.cu b/src/runtime/executor/cuquantum/cuquantum_executor.cu index f8f9057de7055ae9cce191c7d791d04670e31f69..86a9ddee43190002dacc91c8323ca8775e760814 100644 --- a/src/runtime/executor/cuquantum/cuquantum_executor.cu +++ b/src/runtime/executor/cuquantum/cuquantum_executor.cu @@ -1,5 +1,5 @@ /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -15,23 +15,80 @@ Rationale: #include <cuda_runtime.h> #include <vector> +#include <unordered_map> #include <iostream> +#include "talshxx.hpp" + #include "cuquantum_executor.hpp" +#define HANDLE_CTN_ERROR(x) \ +{ const auto err = x; \ + if( err != CUTENSORNET_STATUS_SUCCESS ) \ +{ printf("Error: %s in line %d\n", cutensornetGetErrorString(err), __LINE__); std::abort(); } \ +}; + + namespace exatn { namespace runtime { +struct TensorDescriptor { + std::vector<int32_t> modes; + std::vector<int64_t> extents; +}; + struct TensorNetworkReq { std::shared_ptr<numerics::TensorNetwork> network; + std::unordered_map<numerics::TensorHashType,TensorDescriptor> tensor_descriptors; + std::unordered_map<int32_t,int64_t> index_extents; + cutensornetNetworkDescriptor_t net_descriptor; + cutensornetContractionOptimizerConfig_t opt_config; + cutensornetContractionOptimizerInfo_t opt_info; + cutensornetContractionPlan_t comp_plan; + cudaStream_t stream; }; CuQuantumExecutor::CuQuantumExecutor() { const size_t version = cutensornetGetVersion(); - std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Version " << version << std::endl; + std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): cuTensorNet backend version " << version << std::endl; + + int num_gpus = 0; + auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS); + for(int i = 0; i < num_gpus; ++i){ + if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpus.emplace_back(i); + } + std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpus.size() << std::endl; + + ctn_handles.resize(gpus.size()); + for(const auto & gpu_id: gpus){ + auto cuda_error = cudaSetDevice(gpu_id); assert(cuda_error == cudaSuccess); + HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&ctn_handles[gpu_id]))); + } + std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl; + +} + + +CuQuantumExecutor::~CuQuantumExecutor() +{ + bool success = sync(); assert(success); + for(const auto & gpu_id: gpus){ + auto cuda_error = cudaSetDevice(gpu_id); assert(cuda_error == cudaSuccess); + HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(ctn_handles[gpu_id]))); + } + ctn_handles.clear(); + gpus.clear(); +} + + +bool CuQuantumExecutor::sync() +{ + bool success = true; + //`Finish + return success; } } //namespace runtime diff --git a/src/runtime/executor/cuquantum/cuquantum_executor.hpp b/src/runtime/executor/cuquantum/cuquantum_executor.hpp index f4e7c808a75502615515545d85486ff8b6d0c85d..05b6d874eca9c3f3d5f705a7294be9cf82fc7abb 100644 --- a/src/runtime/executor/cuquantum/cuquantum_executor.hpp +++ b/src/runtime/executor/cuquantum/cuquantum_executor.hpp @@ -1,5 +1,5 @@ /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -17,6 +17,7 @@ Rationale: #define EXATN_RUNTIME_CUQUANTUM_EXECUTOR_HPP_ #include <unordered_map> +#include <vector> #include "tensor_network_queue.hpp" @@ -25,7 +26,6 @@ namespace runtime { struct TensorNetworkReq; - class CuQuantumExecutor { public: @@ -35,10 +35,10 @@ public: CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete; CuQuantumExecutor(CuQuantumExecutor &&) noexcept = delete; CuQuantumExecutor & operator=(CuQuantumExecutor &&) noexcept = delete; - virtual ~CuQuantumExecutor() = default; + virtual ~CuQuantumExecutor(); int execute(std::shared_ptr<numerics::TensorNetwork> network, - TensorOpExecHandle * exec_handle); + TensorOpExecHandle exec_handle); bool sync(TensorOpExecHandle exec_handle, int * error_code, @@ -49,7 +49,11 @@ public: protected: /** Currently processed tensor networks **/ - std::unordered_map<TensorOpExecHandle,std::unique_ptr<TensorNetworkReq>> active_networks_; + std::unordered_map<TensorOpExecHandle,std::shared_ptr<TensorNetworkReq>> active_networks_; + /** GPU Ids available to the current process **/ + std::vector<int> gpus; + /** cuTensorNet contexts for all available GPUs **/ + std::vector<void*> ctn_handles; //cutensornetHandle_t }; } //namespace runtime diff --git a/src/runtime/executor/cuquantum/tensor_network_queue.hpp b/src/runtime/executor/cuquantum/tensor_network_queue.hpp index e0fb892a277cb612b0aa1654e6d2480bc766ef94..2a0e08f58e30104c90a79891e1d881d9889bb7d0 100644 --- a/src/runtime/executor/cuquantum/tensor_network_queue.hpp +++ b/src/runtime/executor/cuquantum/tensor_network_queue.hpp @@ -1,5 +1,5 @@ /** ExaTN: Tensor Runtime: Tensor network executor: Execution queue -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -31,20 +31,76 @@ class TensorNetworkQueue { public: - TensorNetworkQueue() = default; + using TensorNetworkQueueIterator = + std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,TensorOpExecHandle>>::iterator; + + using ConstTensorNetworkQueueIterator = + std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,TensorOpExecHandle>>::const_iterator; + + TensorNetworkQueue(): current_network_(networks_.end()) { + } + TensorNetworkQueue(const TensorNetworkQueue &) = delete; TensorNetworkQueue & operator=(const TensorNetworkQueue &) = delete; TensorNetworkQueue(TensorNetworkQueue &&) noexcept = delete; TensorNetworkQueue & operator=(TensorNetworkQueue &&) noexcept = delete; ~TensorNetworkQueue() = default; + TensorNetworkQueueIterator begin() {return networks_.begin();} + TensorNetworkQueueIterator end() {return networks_.end();} + ConstTensorNetworkQueueIterator cbegin() {return networks_.cbegin();} + ConstTensorNetworkQueueIterator cend() {return networks_.cend();} + + bool is_empty() { + lock(); + bool empt = networks_.empty(); + unlock(); + return empt; + } + + TensorOpExecHandle append(std::shared_ptr<numerics::TensorNetwork> network) { + lock(); + const TensorOpExecHandle tn_hash = getTensorNetworkHash(network); + networks_.emplace_back(std::make_pair(network,tn_hash)); + unlock(); + return tn_hash; + } + + ConstTensorNetworkQueueIterator getCurrent() { + return current_network_; + } + + void reset() { + lock(); + current_network_ = networks_.begin(); + unlock(); + return; + } + + bool is_over() { + lock(); + bool over = (current_network_ == networks_.end()); + unlock(); + return over; + } + + bool next() { + lock(); + assert(current_network_ != networks_.end()); + ++current_network_; + unlock(); + return (current_network_ != networks_.end()); + } + inline void lock(){queue_lock_.lock();} inline void unlock(){queue_lock_.unlock();} protected: + /** Queue of tensor networks to be executed **/ std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>, TensorOpExecHandle>> networks_; + TensorNetworkQueueIterator current_network_; std::mutex queue_lock_; }; diff --git a/src/runtime/executor/graph_executors/eager/graph_executor_eager.cpp b/src/runtime/executor/graph_executors/eager/graph_executor_eager.cpp index a43507e90be1648d8d4839f60d4b1bbe3ecbf706..2048c88e8efa22a00726b6a8c0fc86d9812862d1 100644 --- a/src/runtime/executor/graph_executors/eager/graph_executor_eager.cpp +++ b/src/runtime/executor/graph_executors/eager/graph_executor_eager.cpp @@ -1,5 +1,5 @@ /** ExaTN:: Tensor Runtime: Tensor graph executor: Eager -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Tiffany Mintz, Dmitry Lyakh, Alex McCaskey Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -89,6 +89,7 @@ void EagerGraphExecutor::execute(TensorGraph & dag) { void EagerGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { + std::cout << "#FATAL(exatn::runtime::EagerGraphExecutor::execute): Processing of entire tensor networks is not implemented!\n"; assert(false); } diff --git a/src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp b/src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp index ab93aa8f4e240576b652fb2a7abdf7ada0c9c65f..3f21dc09e1ea69958ddfc252ede755e24387e10d 100644 --- a/src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp +++ b/src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp @@ -1,5 +1,5 @@ /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -23,6 +23,19 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) namespace exatn { namespace runtime { +void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, + const ParamConf & parameters, + unsigned int process_rank, + unsigned int global_process_rank) +{ + TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank); +#ifdef CUQUANTUM + cuquantum_executor_ = std::make_shared<CuQuantumExecutor>(); +#endif + return; +} + + void LazyGraphExecutor::execute(TensorGraph & dag) { struct Progress { @@ -255,7 +268,9 @@ void LazyGraphExecutor::execute(TensorGraph & dag) { void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { - +#ifdef CUQUANTUM + //`Implement +#endif return; } diff --git a/src/runtime/executor/graph_executors/lazy/graph_executor_lazy.hpp b/src/runtime/executor/graph_executors/lazy/graph_executor_lazy.hpp index 50d1e040cbc22e2dc035980a0d36d7584e720881..a2828f72619a12b1dd1bee104092a02eb89cd6a5 100644 --- a/src/runtime/executor/graph_executors/lazy/graph_executor_lazy.hpp +++ b/src/runtime/executor/graph_executors/lazy/graph_executor_lazy.hpp @@ -1,5 +1,5 @@ /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry Lyakh, Alex McCaskey Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -16,6 +16,10 @@ Rationale: namespace exatn { namespace runtime { +#ifdef CUQUANTUM +class CuQuantumExecutor; +#endif + class LazyGraphExecutor : public TensorGraphExecutor { public: @@ -25,7 +29,8 @@ public: LazyGraphExecutor(): pipeline_depth_(DEFAULT_PIPELINE_DEPTH), prefetch_depth_(DEFAULT_PREFETCH_DEPTH) - {} + { + } //LazyGraphExecutor(const LazyGraphExecutor &) = delete; //LazyGraphExecutor & operator=(const LazyGraphExecutor &) = delete; @@ -34,6 +39,12 @@ public: virtual ~LazyGraphExecutor() = default; + /** Sets/resets the DAG node executor (tensor operation executor). **/ + virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, + const ParamConf & parameters, + unsigned int process_rank, + unsigned int global_process_rank) override; + /** Traverses the DAG and executes all its nodes. **/ virtual void execute(TensorGraph & dag) override; @@ -62,8 +73,11 @@ public: protected: - unsigned int pipeline_depth_; //max number of active tensor operations in flight - unsigned int prefetch_depth_; //max number of tensor operations with active prefetch in flight + unsigned int pipeline_depth_; //max number of active tensor operations in flight + unsigned int prefetch_depth_; //max number of tensor operations with active prefetch in flight +#ifdef CUQUANTUM + std::shared_ptr<CuQuantumExecutor> cuquantum_executor_; //cuQuantum executor +#endif }; } //namespace runtime diff --git a/src/runtime/executor/tensor_graph_executor.hpp b/src/runtime/executor/tensor_graph_executor.hpp index a99d4d1a56057beadb815e9014f69248a490fb35..7bab9792ec13e7f690ff676cd371349d6ffa0c4f 100644 --- a/src/runtime/executor/tensor_graph_executor.hpp +++ b/src/runtime/executor/tensor_graph_executor.hpp @@ -1,5 +1,5 @@ /** ExaTN:: Tensor Runtime: Tensor graph executor -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -60,10 +60,10 @@ public: } /** Sets/resets the DAG node executor (tensor operation executor). **/ - void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, - const ParamConf & parameters, - unsigned int process_rank, - unsigned int global_process_rank) { + virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, + const ParamConf & parameters, + unsigned int process_rank, + unsigned int global_process_rank) { process_rank_.store(process_rank); global_process_rank_.store(global_process_rank); node_executor_ = node_executor; diff --git a/src/runtime/tensor_runtime.cpp b/src/runtime/tensor_runtime.cpp index 70f6af48b2c0107a551e67f439ddf579fbf988bf..5f2a28f0984beae770d4fa7c7c65693ae3c89d8f 100644 --- a/src/runtime/tensor_runtime.cpp +++ b/src/runtime/tensor_runtime.cpp @@ -1,5 +1,5 @@ /** ExaTN:: Tensor Runtime: Task-based execution layer for tensor operations -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -242,6 +242,17 @@ VertexIdType TensorRuntime::submit(std::shared_ptr<TensorOperation> op) { } +#ifdef CUQUANTUM +bool TensorRuntime::submit(std::shared_ptr<numerics::TensorNetwork> network, + TensorOpExecHandle * exec_handle) +{ + assert(exec_handle != nullptr); + *exec_handle = tensor_network_queue_.append(network); + return true; +} +#endif + + bool TensorRuntime::sync(TensorOperation & op, bool wait) { assert(currentScopeIsSet()); executing_.store(true); //reactivate the execution thread to execute the DAG in case it was not active diff --git a/src/runtime/tensor_runtime.hpp b/src/runtime/tensor_runtime.hpp index a8f5ad7b09c959afb510cdfd92cfc275039909c5..38424c33d80ef43709fbbecb0226aa3c82632808 100644 --- a/src/runtime/tensor_runtime.hpp +++ b/src/runtime/tensor_runtime.hpp @@ -1,5 +1,5 @@ /** ExaTN:: Tensor Runtime: Task-based execution layer for tensor operations -REVISION: 2021/12/21 +REVISION: 2021/12/22 Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) @@ -125,8 +125,14 @@ public: /** Returns TRUE if there is the current scope is set. **/ inline bool currentScopeIsSet() const {return scope_set_.load();} - /** Submits a tensor operation into the current execution graph and returns its integer id. **/ - VertexIdType submit(std::shared_ptr<TensorOperation> op); + /** Submits a tensor operation into the current execution graph and returns its integer id. **/ + VertexIdType submit(std::shared_ptr<TensorOperation> op); //in: tensor operation + +#ifdef CUQUANTUM + /** Submits an entire tensor network for processing as a whole. **/ + bool submit(std::shared_ptr<numerics::TensorNetwork> network, //in: tensor network + TensorOpExecHandle * exec_handle = nullptr); //out: assigned execution handle +#endif /** Tests for completion of a given tensor operation. If wait = TRUE, it will block until completion. **/ @@ -138,7 +144,7 @@ public: bool sync(const Tensor & tensor, bool wait = true); - /** Tests for completion of all tensor operations in the current DAG. + /** Tests for completion of all previously submitted tensor operations. If wait = TRUE, it will block until completion. **/ bool sync(bool wait = true);