Commit cd548974 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

cuQuantumExecutor initializes cuQuantum.


Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent 33322ac3
/** ExaTN::Numerics: Numerical server
REVISION: 2021/11/03
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
......@@ -84,6 +84,7 @@ namespace exatn{
//Primary numerics:: types exposed to the user:
using numerics::VectorSpace;
using numerics::Subspace;
using numerics::TensorHashType;
using numerics::TensorRange;
using numerics::TensorShape;
using numerics::TensorSignature;
......
......@@ -18,7 +18,7 @@
#include "errors.hpp"
//Test activation:
#define EXATN_TEST0
//#define EXATN_TEST0
/*#define EXATN_TEST1
#define EXATN_TEST2
#define EXATN_TEST3
......@@ -44,11 +44,11 @@
#define EXATN_TEST23
#define EXATN_TEST24
#define EXATN_TEST25
#define EXATN_TEST26
#define EXATN_TEST26*/
//#define EXATN_TEST27 //requires input file from source
//#define EXATN_TEST28 //requires input file from source
#define EXATN_TEST29*/
#define EXATN_TEST30
#define EXATN_TEST29
//#define EXATN_TEST30
//#define EXATN_TEST31 //requires input file from source
//#define EXATN_TEST32
......
/** ExaTN::Numerics: Tensor network
REVISION: 2021/10/26
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
......@@ -2779,4 +2779,10 @@ bool TensorNetwork::printTensorNetwork(std::string & network)
} //namespace numerics
numerics::TensorHashType getTensorNetworkHash(std::shared_ptr<numerics::TensorNetwork> network)
{
return reinterpret_cast<numerics::TensorHashType>((void*)(network.get()));
}
} //namespace exatn
/** ExaTN::Numerics: Tensor network
REVISION: 2021/10/26
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
......@@ -763,6 +763,9 @@ inline std::shared_ptr<numerics::TensorNetwork> makeSharedTensorNetwork(Args&&..
return std::make_shared<numerics::TensorNetwork>(std::forward<Args>(args)...);
}
/** Returns the hash of a tensor network. **/
numerics::TensorHashType getTensorNetworkHash(std::shared_ptr<numerics::TensorNetwork> network);
} //namespace exatn
#endif //EXATN_NUMERICS_TENSOR_NETWORK_HPP_
......@@ -25,7 +25,7 @@ endif()
if(CUTENSOR AND NOT CUTENSOR_PATH STREQUAL ".")
target_include_directories(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/include)
target_link_libraries(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/lib/11/libcutensor.so)
target_link_libraries(${LIBRARY_NAME} PRIVATE ${CUTENSOR_PATH}/lib/11/libcutensor.so ExaTensor::ExaTensor)
endif()
exatn_configure_plugin_rpath(${LIBRARY_NAME})
......
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -15,23 +15,80 @@ Rationale:
#include <cuda_runtime.h>
#include <vector>
#include <unordered_map>
#include <iostream>
#include "talshxx.hpp"
#include "cuquantum_executor.hpp"
#define HANDLE_CTN_ERROR(x) \
{ const auto err = x; \
if( err != CUTENSORNET_STATUS_SUCCESS ) \
{ printf("Error: %s in line %d\n", cutensornetGetErrorString(err), __LINE__); std::abort(); } \
};
namespace exatn {
namespace runtime {
struct TensorDescriptor {
std::vector<int32_t> modes;
std::vector<int64_t> extents;
};
struct TensorNetworkReq {
std::shared_ptr<numerics::TensorNetwork> network;
std::unordered_map<numerics::TensorHashType,TensorDescriptor> tensor_descriptors;
std::unordered_map<int32_t,int64_t> index_extents;
cutensornetNetworkDescriptor_t net_descriptor;
cutensornetContractionOptimizerConfig_t opt_config;
cutensornetContractionOptimizerInfo_t opt_info;
cutensornetContractionPlan_t comp_plan;
cudaStream_t stream;
};
CuQuantumExecutor::CuQuantumExecutor()
{
const size_t version = cutensornetGetVersion();
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Version " << version << std::endl;
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): cuTensorNet backend version " << version << std::endl;
int num_gpus = 0;
auto error_code = talshDeviceCount(DEV_NVIDIA_GPU,&num_gpus); assert(error_code == TALSH_SUCCESS);
for(int i = 0; i < num_gpus; ++i){
if(talshDeviceState(i,DEV_NVIDIA_GPU) >= DEV_ON) gpus.emplace_back(i);
}
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Number of available GPUs = " << gpus.size() << std::endl;
ctn_handles.resize(gpus.size());
for(const auto & gpu_id: gpus){
auto cuda_error = cudaSetDevice(gpu_id); assert(cuda_error == cudaSuccess);
HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&ctn_handles[gpu_id])));
}
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl;
}
CuQuantumExecutor::~CuQuantumExecutor()
{
bool success = sync(); assert(success);
for(const auto & gpu_id: gpus){
auto cuda_error = cudaSetDevice(gpu_id); assert(cuda_error == cudaSuccess);
HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(ctn_handles[gpu_id])));
}
ctn_handles.clear();
gpus.clear();
}
bool CuQuantumExecutor::sync()
{
bool success = true;
//`Finish
return success;
}
} //namespace runtime
......
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -17,6 +17,7 @@ Rationale:
#define EXATN_RUNTIME_CUQUANTUM_EXECUTOR_HPP_
#include <unordered_map>
#include <vector>
#include "tensor_network_queue.hpp"
......@@ -25,7 +26,6 @@ namespace runtime {
struct TensorNetworkReq;
class CuQuantumExecutor {
public:
......@@ -35,10 +35,10 @@ public:
CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete;
CuQuantumExecutor(CuQuantumExecutor &&) noexcept = delete;
CuQuantumExecutor & operator=(CuQuantumExecutor &&) noexcept = delete;
virtual ~CuQuantumExecutor() = default;
virtual ~CuQuantumExecutor();
int execute(std::shared_ptr<numerics::TensorNetwork> network,
TensorOpExecHandle * exec_handle);
TensorOpExecHandle exec_handle);
bool sync(TensorOpExecHandle exec_handle,
int * error_code,
......@@ -49,7 +49,11 @@ public:
protected:
/** Currently processed tensor networks **/
std::unordered_map<TensorOpExecHandle,std::unique_ptr<TensorNetworkReq>> active_networks_;
std::unordered_map<TensorOpExecHandle,std::shared_ptr<TensorNetworkReq>> active_networks_;
/** GPU Ids available to the current process **/
std::vector<int> gpus;
/** cuTensorNet contexts for all available GPUs **/
std::vector<void*> ctn_handles; //cutensornetHandle_t
};
} //namespace runtime
......
/** ExaTN: Tensor Runtime: Tensor network executor: Execution queue
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -31,20 +31,76 @@ class TensorNetworkQueue {
public:
TensorNetworkQueue() = default;
using TensorNetworkQueueIterator =
std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,TensorOpExecHandle>>::iterator;
using ConstTensorNetworkQueueIterator =
std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,TensorOpExecHandle>>::const_iterator;
TensorNetworkQueue(): current_network_(networks_.end()) {
}
TensorNetworkQueue(const TensorNetworkQueue &) = delete;
TensorNetworkQueue & operator=(const TensorNetworkQueue &) = delete;
TensorNetworkQueue(TensorNetworkQueue &&) noexcept = delete;
TensorNetworkQueue & operator=(TensorNetworkQueue &&) noexcept = delete;
~TensorNetworkQueue() = default;
TensorNetworkQueueIterator begin() {return networks_.begin();}
TensorNetworkQueueIterator end() {return networks_.end();}
ConstTensorNetworkQueueIterator cbegin() {return networks_.cbegin();}
ConstTensorNetworkQueueIterator cend() {return networks_.cend();}
bool is_empty() {
lock();
bool empt = networks_.empty();
unlock();
return empt;
}
TensorOpExecHandle append(std::shared_ptr<numerics::TensorNetwork> network) {
lock();
const TensorOpExecHandle tn_hash = getTensorNetworkHash(network);
networks_.emplace_back(std::make_pair(network,tn_hash));
unlock();
return tn_hash;
}
ConstTensorNetworkQueueIterator getCurrent() {
return current_network_;
}
void reset() {
lock();
current_network_ = networks_.begin();
unlock();
return;
}
bool is_over() {
lock();
bool over = (current_network_ == networks_.end());
unlock();
return over;
}
bool next() {
lock();
assert(current_network_ != networks_.end());
++current_network_;
unlock();
return (current_network_ != networks_.end());
}
inline void lock(){queue_lock_.lock();}
inline void unlock(){queue_lock_.unlock();}
protected:
/** Queue of tensor networks to be executed **/
std::list<std::pair<std::shared_ptr<numerics::TensorNetwork>,
TensorOpExecHandle>> networks_;
TensorNetworkQueueIterator current_network_;
std::mutex queue_lock_;
};
......
/** ExaTN:: Tensor Runtime: Tensor graph executor: Eager
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Tiffany Mintz, Dmitry Lyakh, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -89,6 +89,7 @@ void EagerGraphExecutor::execute(TensorGraph & dag) {
void EagerGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) {
std::cout << "#FATAL(exatn::runtime::EagerGraphExecutor::execute): Processing of entire tensor networks is not implemented!\n";
assert(false);
}
......
/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -23,6 +23,19 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
namespace exatn {
namespace runtime {
void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
const ParamConf & parameters,
unsigned int process_rank,
unsigned int global_process_rank)
{
TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank);
#ifdef CUQUANTUM
cuquantum_executor_ = std::make_shared<CuQuantumExecutor>();
#endif
return;
}
void LazyGraphExecutor::execute(TensorGraph & dag) {
struct Progress {
......@@ -255,7 +268,9 @@ void LazyGraphExecutor::execute(TensorGraph & dag) {
void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) {
#ifdef CUQUANTUM
//`Implement
#endif
return;
}
......
/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -16,6 +16,10 @@ Rationale:
namespace exatn {
namespace runtime {
#ifdef CUQUANTUM
class CuQuantumExecutor;
#endif
class LazyGraphExecutor : public TensorGraphExecutor {
public:
......@@ -25,7 +29,8 @@ public:
LazyGraphExecutor(): pipeline_depth_(DEFAULT_PIPELINE_DEPTH),
prefetch_depth_(DEFAULT_PREFETCH_DEPTH)
{}
{
}
//LazyGraphExecutor(const LazyGraphExecutor &) = delete;
//LazyGraphExecutor & operator=(const LazyGraphExecutor &) = delete;
......@@ -34,6 +39,12 @@ public:
virtual ~LazyGraphExecutor() = default;
/** Sets/resets the DAG node executor (tensor operation executor). **/
virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
const ParamConf & parameters,
unsigned int process_rank,
unsigned int global_process_rank) override;
/** Traverses the DAG and executes all its nodes. **/
virtual void execute(TensorGraph & dag) override;
......@@ -62,8 +73,11 @@ public:
protected:
unsigned int pipeline_depth_; //max number of active tensor operations in flight
unsigned int prefetch_depth_; //max number of tensor operations with active prefetch in flight
unsigned int pipeline_depth_; //max number of active tensor operations in flight
unsigned int prefetch_depth_; //max number of tensor operations with active prefetch in flight
#ifdef CUQUANTUM
std::shared_ptr<CuQuantumExecutor> cuquantum_executor_; //cuQuantum executor
#endif
};
} //namespace runtime
......
/** ExaTN:: Tensor Runtime: Tensor graph executor
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -60,10 +60,10 @@ public:
}
/** Sets/resets the DAG node executor (tensor operation executor). **/
void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
const ParamConf & parameters,
unsigned int process_rank,
unsigned int global_process_rank) {
virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
const ParamConf & parameters,
unsigned int process_rank,
unsigned int global_process_rank) {
process_rank_.store(process_rank);
global_process_rank_.store(global_process_rank);
node_executor_ = node_executor;
......
/** ExaTN:: Tensor Runtime: Task-based execution layer for tensor operations
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -242,6 +242,17 @@ VertexIdType TensorRuntime::submit(std::shared_ptr<TensorOperation> op) {
}
#ifdef CUQUANTUM
bool TensorRuntime::submit(std::shared_ptr<numerics::TensorNetwork> network,
TensorOpExecHandle * exec_handle)
{
assert(exec_handle != nullptr);
*exec_handle = tensor_network_queue_.append(network);
return true;
}
#endif
bool TensorRuntime::sync(TensorOperation & op, bool wait) {
assert(currentScopeIsSet());
executing_.store(true); //reactivate the execution thread to execute the DAG in case it was not active
......
/** ExaTN:: Tensor Runtime: Task-based execution layer for tensor operations
REVISION: 2021/12/21
REVISION: 2021/12/22
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -125,8 +125,14 @@ public:
/** Returns TRUE if there is the current scope is set. **/
inline bool currentScopeIsSet() const {return scope_set_.load();}
/** Submits a tensor operation into the current execution graph and returns its integer id. **/
VertexIdType submit(std::shared_ptr<TensorOperation> op);
/** Submits a tensor operation into the current execution graph and returns its integer id. **/
VertexIdType submit(std::shared_ptr<TensorOperation> op); //in: tensor operation
#ifdef CUQUANTUM
/** Submits an entire tensor network for processing as a whole. **/
bool submit(std::shared_ptr<numerics::TensorNetwork> network, //in: tensor network
TensorOpExecHandle * exec_handle = nullptr); //out: assigned execution handle
#endif
/** Tests for completion of a given tensor operation.
If wait = TRUE, it will block until completion. **/
......@@ -138,7 +144,7 @@ public:
bool sync(const Tensor & tensor,
bool wait = true);
/** Tests for completion of all tensor operations in the current DAG.
/** Tests for completion of all previously submitted tensor operations.
If wait = TRUE, it will block until completion. **/
bool sync(bool wait = true);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment