Commit cd548974 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

cuQuantumExecutor initializes cuQuantum.


Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent 33322ac3
/** ExaTN::Numerics: Numerical server /** ExaTN::Numerics: Numerical server
REVISION: 2021/11/03 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
...@@ -84,6 +84,7 @@ namespace exatn{ ...@@ -84,6 +84,7 @@ namespace exatn{
//Primary numerics:: types exposed to the user: //Primary numerics:: types exposed to the user:
using numerics::VectorSpace; using numerics::VectorSpace;
using numerics::Subspace; using numerics::Subspace;
using numerics::TensorHashType;
using numerics::TensorRange; using numerics::TensorRange;
using numerics::TensorShape; using numerics::TensorShape;
using numerics::TensorSignature; using numerics::TensorSignature;
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "errors.hpp" #include "errors.hpp"
//Test activation: //Test activation:
#define EXATN_TEST0 //#define EXATN_TEST0
/*#define EXATN_TEST1 /*#define EXATN_TEST1
#define EXATN_TEST2 #define EXATN_TEST2
#define EXATN_TEST3 #define EXATN_TEST3
...@@ -44,11 +44,11 @@ ...@@ -44,11 +44,11 @@
#define EXATN_TEST23 #define EXATN_TEST23
#define EXATN_TEST24 #define EXATN_TEST24
#define EXATN_TEST25 #define EXATN_TEST25
#define EXATN_TEST26 #define EXATN_TEST26*/
//#define EXATN_TEST27 //requires input file from source //#define EXATN_TEST27 //requires input file from source
//#define EXATN_TEST28 //requires input file from source //#define EXATN_TEST28 //requires input file from source
#define EXATN_TEST29*/ #define EXATN_TEST29
#define EXATN_TEST30 //#define EXATN_TEST30
//#define EXATN_TEST31 //requires input file from source //#define EXATN_TEST31 //requires input file from source
//#define EXATN_TEST32 //#define EXATN_TEST32
......
/** ExaTN::Numerics: Tensor network /** ExaTN::Numerics: Tensor network
REVISION: 2021/10/26 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
...@@ -2779,4 +2779,10 @@ bool TensorNetwork::printTensorNetwork(std::string & network) ...@@ -2779,4 +2779,10 @@ bool TensorNetwork::printTensorNetwork(std::string & network)
} //namespace numerics } //namespace numerics
numerics::TensorHashType getTensorNetworkHash(std::shared_ptr<numerics::TensorNetwork> network)
{
return reinterpret_cast<numerics::TensorHashType>((void*)(network.get()));
}
} //namespace exatn } //namespace exatn
/** ExaTN::Numerics: Tensor network /** ExaTN::Numerics: Tensor network
REVISION: 2021/10/26 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
...@@ -763,6 +763,9 @@ inline std::shared_ptr<numerics::TensorNetwork> makeSharedTensorNetwork(Args&&.. ...@@ -763,6 +763,9 @@ inline std::shared_ptr<numerics::TensorNetwork> makeSharedTensorNetwork(Args&&..
return std::make_shared<numerics::TensorNetwork>(std::forward<Args>(args)...); return std::make_shared<numerics::TensorNetwork>(std::forward<Args>(args)...);
} }
/** Returns the hash of a tensor network. **/
numerics::TensorHashType getTensorNetworkHash(std::shared_ptr<numerics::TensorNetwork> network);
} //namespace exatn } //namespace exatn
#endif //EXATN_NUMERICS_TENSOR_NETWORK_HPP_ #endif //EXATN_NUMERICS_TENSOR_NETWORK_HPP_
...@@ -25,7 +25,7 @@ endif() ...@@ -25,7 +25,7 @@ endif()
if(CUTENSOR AND NOT CUTENSOR_PATH STREQUAL ".") if(CUTENSOR AND NOT CUTENSOR_PATH STREQUAL ".")
target_include_directories(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/include) target_include_directories(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/include)
target_link_libraries(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/lib/11/libcutensor.so) target_link_libraries(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/lib/11/libcutensor.so ExaTensor::ExaTensor)
endif() endif()
exatn_configure_plugin_rpath(${LIBRARY_NAME}) exatn_configure_plugin_rpath(${LIBRARY_NAME})
......
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/21 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
...@@ -15,23 +15,80 @@ Rationale: ...@@ -15,23 +15,80 @@ Rationale:
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <vector> #include <vector>
#include <unordered_map>
#include <iostream> #include <iostream>
#include "talshxx.hpp"
#include "cuquantum_executor.hpp" #include "cuquantum_executor.hpp"
#define HANDLE_CTN_ERROR(x) \
{ const auto err = x; \
if( err != CUTENSORNET_STATUS_SUCCESS ) \
{ printf("Error: %s in line %d\n", cutensornetGetErrorString(err), __LINE__); std::abort(); } \
};
namespace exatn { namespace exatn {
namespace runtime { namespace runtime {
struct TensorDescriptor {
std::vector<int32_t> modes;
std::vector<int64_t> extents;
};
struct TensorNetworkReq { struct TensorNetworkReq {
std::shared_ptr<numerics::TensorNetwork> network; std::shared_ptr<numerics::TensorNetwork> network;
std::unordered_map<numerics::TensorHashType,TensorDescriptor> tensor_descriptors;
std::unordered_map<int32_t,int64_t> index_extents;
cutensornetNetworkDescriptor_t net_descriptor;
cutensornetContractionOptimizerConfig_t opt_config;
cutensornetContractionOptimizerInfo_t opt_info;
cutensornetContractionPlan_t comp_plan;
cudaStream_t stream;
}; };
CuQuantumExecutor::CuQuantumExecutor() CuQuantumExecutor::CuQuantumExecutor()
{ {
const size_t version = cutensornetGetVersion(); const size_t version = cutensornetGetVersion();
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Version " << version << std::endl; std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): cuTensorNet backend version " << version << std::endl;
int num_gpus = 0;
auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS);
for(int i = 0; i < num_gpus; ++i){
if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpus.emplace_back(i);
}
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpus.size() << std::endl;
ctn_handles.resize(gpus.size());
for(const auto & gpu_id: gpus){
auto cuda_error = cudaSetDevice(gpu_id); assert(cuda_error == cudaSuccess);
HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&ctn_handles[gpu_id])));
}
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl;
}
CuQuantumExecutor::~CuQuantumExecutor()
{
bool success = sync(); assert(success);
for(const auto & gpu_id: gpus){
auto cuda_error = cudaSetDevice(gpu_id); assert(cuda_error == cudaSuccess);
HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(ctn_handles[gpu_id])));
}
ctn_handles.clear();
gpus.clear();
}
bool CuQuantumExecutor::sync()
{
bool success = true;
//`Finish
return success;
} }
} //namespace runtime } //namespace runtime
......
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum /** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/21 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
...@@ -17,6 +17,7 @@ Rationale: ...@@ -17,6 +17,7 @@ Rationale:
#define EXATN_RUNTIME_CUQUANTUM_EXECUTOR_HPP_ #define EXATN_RUNTIME_CUQUANTUM_EXECUTOR_HPP_
#include <unordered_map> #include <unordered_map>
#include <vector>
#include "tensor_network_queue.hpp" #include "tensor_network_queue.hpp"
...@@ -25,7 +26,6 @@ namespace runtime { ...@@ -25,7 +26,6 @@ namespace runtime {
struct TensorNetworkReq; struct TensorNetworkReq;
class CuQuantumExecutor { class CuQuantumExecutor {
public: public:
...@@ -35,10 +35,10 @@ public: ...@@ -35,10 +35,10 @@ public:
CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete; CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete;
CuQuantumExecutor(CuQuantumExecutor &&) noexcept = delete; CuQuantumExecutor(CuQuantumExecutor &&) noexcept = delete;
CuQuantumExecutor & operator=(CuQuantumExecutor &&) noexcept = delete; CuQuantumExecutor & operator=(CuQuantumExecutor &&) noexcept = delete;
virtual ~CuQuantumExecutor() = default; virtual ~CuQuantumExecutor();
int execute(std::shared_ptr<numerics::TensorNetwork> network, int execute(std::shared_ptr<numerics::TensorNetwork> network,
TensorOpExecHandle * exec_handle); TensorOpExecHandle exec_handle);
bool sync(TensorOpExecHandle exec_handle, bool sync(TensorOpExecHandle exec_handle,
int * error_code, int * error_code,
...@@ -49,7 +49,11 @@ public: ...@@ -49,7 +49,11 @@ public:
protected: protected:
/** Currently processed tensor networks **/ /** Currently processed tensor networks **/
std::unordered_map<TensorOpExecHandle,std::unique_ptr<TensorNetworkReq>> active_networks_; std::unordered_map<TensorOpExecHandle,std::shared_ptr<TensorNetworkReq>> active_networks_;
/** GPU Ids available to the current process **/
std::vector<int> gpus;
/** cuTensorNet contexts for all available GPUs **/
std::vector<void*> ctn_handles; //cutensornetHandle_t
}; };
} //namespace runtime } //namespace runtime
......
/** ExaTN: Tensor Runtime: Tensor network executor: Execution queue /** ExaTN: Tensor Runtime: Tensor network executor: Execution queue
REVISION: 2021/12/21 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
...@@ -31,20 +31,76 @@ class TensorNetworkQueue { ...@@ -31,20 +31,76 @@ class TensorNetworkQueue {
public: public:
TensorNetworkQueue() = default; using TensorNetworkQueueIterator =
std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,TensorOpExecHandle>>::iterator;
using ConstTensorNetworkQueueIterator =
std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,TensorOpExecHandle>>::const_iterator;
TensorNetworkQueue(): current_network_(networks_.end()) {
}
TensorNetworkQueue(const TensorNetworkQueue &) = delete; TensorNetworkQueue(const TensorNetworkQueue &) = delete;
TensorNetworkQueue & operator=(const TensorNetworkQueue &) = delete; TensorNetworkQueue & operator=(const TensorNetworkQueue &) = delete;
TensorNetworkQueue(TensorNetworkQueue &&) noexcept = delete; TensorNetworkQueue(TensorNetworkQueue &&) noexcept = delete;
TensorNetworkQueue & operator=(TensorNetworkQueue &&) noexcept = delete; TensorNetworkQueue & operator=(TensorNetworkQueue &&) noexcept = delete;
~TensorNetworkQueue() = default; ~TensorNetworkQueue() = default;
TensorNetworkQueueIterator begin() {return networks_.begin();}
TensorNetworkQueueIterator end() {return networks_.end();}
ConstTensorNetworkQueueIterator cbegin() {return networks_.cbegin();}
ConstTensorNetworkQueueIterator cend() {return networks_.cend();}
bool is_empty() {
lock();
bool empt = networks_.empty();
unlock();
return empt;
}
TensorOpExecHandle append(std::shared_ptr<numerics::TensorNetwork> network) {
lock();
const TensorOpExecHandle tn_hash = getTensorNetworkHash(network);
networks_.emplace_back(std::make_pair(network,tn_hash));
unlock();
return tn_hash;
}
ConstTensorNetworkQueueIterator getCurrent() {
return current_network_;
}
void reset() {
lock();
current_network_ = networks_.begin();
unlock();
return;
}
bool is_over() {
lock();
bool over = (current_network_ == networks_.end());
unlock();
return over;
}
bool next() {
lock();
assert(current_network_ != networks_.end());
++current_network_;
unlock();
return (current_network_ != networks_.end());
}
inline void lock(){queue_lock_.lock();} inline void lock(){queue_lock_.lock();}
inline void unlock(){queue_lock_.unlock();} inline void unlock(){queue_lock_.unlock();}
protected: protected:
/** Queue of tensor networks to be executed **/
std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>, std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,
TensorOpExecHandle>> networks_; TensorOpExecHandle>> networks_;
TensorNetworkQueueIterator current_network_;
std::mutex queue_lock_; std::mutex queue_lock_;
}; };
......
/** ExaTN:: Tensor Runtime: Tensor graph executor: Eager /** ExaTN:: Tensor Runtime: Tensor graph executor: Eager
REVISION: 2021/12/21 REVISION: 2021/12/22
Copyright (C) 2018-2021 Tiffany Mintz, Dmitry Lyakh, Alex McCaskey Copyright (C) 2018-2021 Tiffany Mintz, Dmitry Lyakh, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
...@@ -89,6 +89,7 @@ void EagerGraphExecutor::execute(TensorGraph & dag) { ...@@ -89,6 +89,7 @@ void EagerGraphExecutor::execute(TensorGraph & dag) {
void EagerGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { void EagerGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) {
std::cout << "#FATAL(exatn::runtime::EagerGraphExecutor::execute): Processing of entire tensor networks is not implemented!\n";
assert(false); assert(false);
} }
......
/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
REVISION: 2021/12/21 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
...@@ -23,6 +23,19 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) ...@@ -23,6 +23,19 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
namespace exatn { namespace exatn {
namespace runtime { namespace runtime {
void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
const ParamConf & parameters,
unsigned int process_rank,
unsigned int global_process_rank)
{
TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank);
#ifdef CUQUANTUM
cuquantum_executor_ = std::make_shared<CuQuantumExecutor>();
#endif
return;
}
void LazyGraphExecutor::execute(TensorGraph & dag) { void LazyGraphExecutor::execute(TensorGraph & dag) {
struct Progress { struct Progress {
...@@ -255,7 +268,9 @@ void LazyGraphExecutor::execute(TensorGraph & dag) { ...@@ -255,7 +268,9 @@ void LazyGraphExecutor::execute(TensorGraph & dag) {
void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) { void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) {
#ifdef CUQUANTUM
//`Implement
#endif
return; return;
} }
......
/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy /** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
REVISION: 2021/12/21 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh, Alex McCaskey Copyright (C) 2018-2021 Dmitry Lyakh, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
...@@ -16,6 +16,10 @@ Rationale: ...@@ -16,6 +16,10 @@ Rationale:
namespace exatn { namespace exatn {
namespace runtime { namespace runtime {
#ifdef CUQUANTUM
class CuQuantumExecutor;
#endif
class LazyGraphExecutor : public TensorGraphExecutor { class LazyGraphExecutor : public TensorGraphExecutor {
public: public:
...@@ -25,7 +29,8 @@ public: ...@@ -25,7 +29,8 @@ public:
LazyGraphExecutor(): pipeline_depth_(DEFAULT_PIPELINE_DEPTH), LazyGraphExecutor(): pipeline_depth_(DEFAULT_PIPELINE_DEPTH),
prefetch_depth_(DEFAULT_PREFETCH_DEPTH) prefetch_depth_(DEFAULT_PREFETCH_DEPTH)
{} {
}
//LazyGraphExecutor(const LazyGraphExecutor &) = delete; //LazyGraphExecutor(const LazyGraphExecutor &) = delete;
//LazyGraphExecutor & operator=(const LazyGraphExecutor &) = delete; //LazyGraphExecutor & operator=(const LazyGraphExecutor &) = delete;
...@@ -34,6 +39,12 @@ public: ...@@ -34,6 +39,12 @@ public:
virtual ~LazyGraphExecutor() = default; virtual ~LazyGraphExecutor() = default;
/** Sets/resets the DAG node executor (tensor operation executor). **/
virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
const ParamConf & parameters,
unsigned int process_rank,
unsigned int global_process_rank) override;
/** Traverses the DAG and executes all its nodes. **/ /** Traverses the DAG and executes all its nodes. **/
virtual void execute(TensorGraph & dag) override; virtual void execute(TensorGraph & dag) override;
...@@ -62,8 +73,11 @@ public: ...@@ -62,8 +73,11 @@ public:
protected: protected:
unsigned int pipeline_depth_; //max number of active tensor operations in flight unsigned int pipeline_depth_; //max number of active tensor operations in flight
unsigned int prefetch_depth_; //max number of tensor operations with active prefetch in flight unsigned int prefetch_depth_; //max number of tensor operations with active prefetch in flight
#ifdef CUQUANTUM
std::shared_ptr<CuQuantumExecutor> cuquantum_executor_; //cuQuantum executor
#endif
}; };
} //namespace runtime } //namespace runtime
......
/** ExaTN:: Tensor Runtime: Tensor graph executor /** ExaTN:: Tensor Runtime: Tensor graph executor
REVISION: 2021/12/21 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
...@@ -60,10 +60,10 @@ public: ...@@ -60,10 +60,10 @@ public:
} }
/** Sets/resets the DAG node executor (tensor operation executor). **/ /** Sets/resets the DAG node executor (tensor operation executor). **/
void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor, virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
const ParamConf & parameters, const ParamConf & parameters,
unsigned int process_rank, unsigned int process_rank,
unsigned int global_process_rank) { unsigned int global_process_rank) {
process_rank_.store(process_rank); process_rank_.store(process_rank);
global_process_rank_.store(global_process_rank); global_process_rank_.store(global_process_rank);
node_executor_ = node_executor; node_executor_ = node_executor;
......
/** ExaTN:: Tensor Runtime: Task-based execution layer for tensor operations /** ExaTN:: Tensor Runtime: Task-based execution layer for tensor operations
REVISION: 2021/12/21 REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
...@@ -242,6 +242,17 @@ VertexIdType TensorRuntime::submit(std::shared_ptr<TensorOperation> op) { ...@@ -242,6 +242,17 @@ VertexIdType TensorRuntime::submit(std::shared_ptr<TensorOperation> op) {
} }
#ifdef CUQUANTUM
bool TensorRuntime::submit(std::shared_ptr<numerics::TensorNetwork> network,
TensorOpExecHandle * exec_handle)
{
assert(exec_handle != nullptr);
*exec_handle = tensor_network_queue_.append(network);
return true;
}
#endif
bool TensorRuntime::sync(TensorOperation & op, bool wait) {