Commit 5e56a54b authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Implemented tensor network queue processing workflow for cuQuantum


Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent cd548974
......@@ -18,14 +18,14 @@
#include "errors.hpp"
//Test activation:
//#define EXATN_TEST0
/*#define EXATN_TEST1
/*#define EXATN_TEST0
#define EXATN_TEST1
#define EXATN_TEST2
#define EXATN_TEST3
#define EXATN_TEST4
#define EXATN_TEST5
#define EXATN_TEST5*/
#define EXATN_TEST6
#define EXATN_TEST7
/*#define EXATN_TEST7
#define EXATN_TEST8
#define EXATN_TEST9
#define EXATN_TEST10
......@@ -47,7 +47,7 @@
#define EXATN_TEST26*/
//#define EXATN_TEST27 //requires input file from source
//#define EXATN_TEST28 //requires input file from source
#define EXATN_TEST29
//#define EXATN_TEST29
//#define EXATN_TEST30
//#define EXATN_TEST31 //requires input file from source
//#define EXATN_TEST32
......@@ -852,7 +852,7 @@ TEST(NumServerTester, largeCircuitNumServer)
auto talsh_tensor = exatn::getLocalTensor(inverse.getTensor(0)->getName());
const std::complex<double>* body_ptr;
if (talsh_tensor->getDataAccessHostConst(&body_ptr)) {
std::cout << "Fina result is " << *body_ptr << "\n";
std::cout << "Final result is " << *body_ptr << "\n";
}
}
......
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/22
REVISION: 2021/12/24
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -23,9 +23,16 @@ Rationale:
#include "cuquantum_executor.hpp"
#define HANDLE_CTN_ERROR(x) \
{ const auto err = x; \
if( err != CUTENSORNET_STATUS_SUCCESS ) \
#define HANDLE_CUDA_ERROR(x) \
{ const auto err = x; \
if( err != cudaSuccess ) \
{ printf("Error: %s in line %d\n", cudaGetErrorString(err), __LINE__); std::abort(); } \
};
#define HANDLE_CTN_ERROR(x) \
{ const auto err = x; \
if( err != CUTENSORNET_STATUS_SUCCESS ) \
{ printf("Error: %s in line %d\n", cutensornetGetErrorString(err), __LINE__); std::abort(); } \
};
......@@ -64,7 +71,7 @@ CuQuantumExecutor::CuQuantumExecutor()
ctn_handles.resize(gpus.size());
for(const auto & gpu_id: gpus){
auto cuda_error = cudaSetDevice(gpu_id); assert(cuda_error == cudaSuccess);
HANDLE_CUDA_ERROR(cudaSetDevice(gpu_id));
HANDLE_CTN_ERROR(cutensornetCreate((cutensornetHandle_t*)(&ctn_handles[gpu_id])));
}
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Created cuTensorNet contexts for all available GPUs" << std::endl;
......@@ -76,19 +83,50 @@ CuQuantumExecutor::~CuQuantumExecutor()
{
bool success = sync(); assert(success);
for(const auto & gpu_id: gpus){
auto cuda_error = cudaSetDevice(gpu_id); assert(cuda_error == cudaSuccess);
HANDLE_CUDA_ERROR(cudaSetDevice(gpu_id));
HANDLE_CTN_ERROR(cutensornetDestroy((cutensornetHandle_t)(ctn_handles[gpu_id])));
}
std::cout << "#DEBUG(exatn::runtime::CuQuantumExecutor): Destroyed cuTensorNet contexts for all available GPUs" << std::endl;
ctn_handles.clear();
gpus.clear();
}
int CuQuantumExecutor::execute(std::shared_ptr<numerics::TensorNetwork> network,
TensorOpExecHandle exec_handle)
{
int error_code = 0;
//`Finish
return error_code;
}
bool CuQuantumExecutor::executing(TensorOpExecHandle exec_handle)
{
auto iter = active_networks_.find(exec_handle);
return (iter != active_networks_.end());
}
bool CuQuantumExecutor::sync(TensorOpExecHandle exec_handle,
int * error_code,
bool wait)
{
bool synced = true;
*error_code = 0;
auto iter = active_networks_.find(exec_handle);
if(iter != active_networks_.end()){
//`Finish
}
return synced;
}
bool CuQuantumExecutor::sync()
{
bool success = true;
bool synced = true;
//`Finish
return success;
return synced;
}
} //namespace runtime
......
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2021/12/22
REVISION: 2021/12/24
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -40,6 +40,8 @@ public:
int execute(std::shared_ptr<numerics::TensorNetwork> network,
TensorOpExecHandle exec_handle);
bool executing(TensorOpExecHandle exec_handle);
bool sync(TensorOpExecHandle exec_handle,
int * error_code,
bool wait = true);
......@@ -53,7 +55,7 @@ protected:
/** GPU Ids available to the current process **/
std::vector<int> gpus;
/** cuTensorNet contexts for all available GPUs **/
std::vector<void*> ctn_handles; //cutensornetHandle_t
std::vector<void*> ctn_handles; //cutensornetHandle_t = void*
};
} //namespace runtime
......
/** ExaTN: Tensor Runtime: Tensor network executor: Execution queue
REVISION: 2021/12/22
REVISION: 2021/12/24
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -51,13 +51,20 @@ public:
ConstTensorNetworkQueueIterator cbegin() {return networks_.cbegin();}
ConstTensorNetworkQueueIterator cend() {return networks_.cend();}
bool is_empty() {
bool isEmpty() {
lock();
bool empt = networks_.empty();
unlock();
return empt;
}
std::size_t getSize() {
lock();
const std::size_t current_size = networks_.size();
unlock();
return current_size;
}
TensorOpExecHandle append(std::shared_ptr<numerics::TensorNetwork> network) {
lock();
const TensorOpExecHandle tn_hash = getTensorNetworkHash(network);
......@@ -66,6 +73,14 @@ public:
return tn_hash;
}
void remove() {
lock();
assert(current_network_ != networks_.end());
current_network_ = networks_.erase(current_network_);
unlock();
return;
}
ConstTensorNetworkQueueIterator getCurrent() {
return current_network_;
}
......@@ -77,7 +92,7 @@ public:
return;
}
bool is_over() {
bool isOver() {
lock();
bool over = (current_network_ == networks_.end());
unlock();
......@@ -88,8 +103,9 @@ public:
lock();
assert(current_network_ != networks_.end());
++current_network_;
bool not_over = (current_network_ != networks_.end());
unlock();
return (current_network_ != networks_.end());
return not_over;
}
inline void lock(){queue_lock_.lock();}
......
/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
REVISION: 2021/12/22
REVISION: 2021/12/24
Copyright (C) 2018-2021 Dmitry Lyakh
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -30,7 +30,7 @@ void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> no
{
TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank);
#ifdef CUQUANTUM
cuquantum_executor_ = std::make_shared<CuQuantumExecutor>();
if(node_executor) cuquantum_executor_ = std::make_shared<CuQuantumExecutor>();
#endif
return;
}
......@@ -268,9 +268,40 @@ void LazyGraphExecutor::execute(TensorGraph & dag) {
void LazyGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) {
std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Started executing the tensor network queue via cuQuantum\n";
#ifdef CUQUANTUM
//`Implement
//Synchronize the node executor:
node_executor_->sync();
node_executor_->clearCache();
//Process the tensor network queue:
while(!tensor_network_queue.isEmpty()){
tensor_network_queue.reset();
bool not_over = !tensor_network_queue.isOver();
while(not_over){
const auto current = tensor_network_queue.getCurrent();
const auto exec_handle = current->second;
if(cuquantum_executor_->executing(exec_handle)){
int error_code = 0;
auto synced = cuquantum_executor_->sync(exec_handle,&error_code,false);
assert(error_code == 0);
if(synced){
tensor_network_queue.remove();
std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Completed tensor network execution via cuQuantum\n";
not_over = !tensor_network_queue.isOver();
}else{
not_over = tensor_network_queue.next();
}
}else{
auto error_code = cuquantum_executor_->execute(current->first,exec_handle);
assert(error_code == 0);
std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Submitted tensor network to cuQuantum\n";
not_over = tensor_network_queue.next();
}
}
}
bool synced = cuquantum_executor_->sync(); assert(synced);
#endif
std::cout << "#DEBUG(exatn::runtime::LazyGraphExecutor::execute): Finished executing the tensor network queue via cuQuantum\n";
return;
}
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor: Exatensor
REVISION: 2021/07/02
REVISION: 2021/12/24
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -198,6 +198,12 @@ bool ExatensorNodeExecutor::prefetch(const numerics::TensorOperation & op)
}
void ExatensorNodeExecutor::clearCache()
{
return;
}
std::shared_ptr<talsh::Tensor> ExatensorNodeExecutor::getLocalTensor(const numerics::Tensor & tensor,
const std::vector<std::pair<DimOffset,DimExtent>> & slice_spec)
{
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor: Exatensor
REVISION: 2021/07/02
REVISION: 2021/21/24
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -82,6 +82,8 @@ public:
bool prefetch(const numerics::TensorOperation & op) override;
void clearCache() override;
std::shared_ptr<talsh::Tensor> getLocalTensor(const numerics::Tensor & tensor,
const std::vector<std::pair<DimOffset,DimExtent>> & slice_spec) override;
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor: Talsh
REVISION: 2021/09/24
REVISION: 2021/12/24
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -1398,6 +1398,13 @@ bool TalshNodeExecutor::prefetch(const numerics::TensorOperation & op)
}
void TalshNodeExecutor::clearCache()
{
bool evicted = evictMovedTensors();
return;
}
std::shared_ptr<talsh::Tensor> TalshNodeExecutor::getLocalTensor(const numerics::Tensor & tensor,
const std::vector<std::pair<DimOffset,DimExtent>> & slice_spec)
{
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor: Talsh
REVISION: 2021/07/23
REVISION: 2021/12/24
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -91,6 +91,8 @@ public:
bool prefetch(const numerics::TensorOperation & op) override;
void clearCache() override;
/** Returns a locally stored slice copy of a tensor, or nullptr if no RAM. **/
std::shared_ptr<talsh::Tensor> getLocalTensor(const numerics::Tensor & tensor,
const std::vector<std::pair<DimOffset,DimExtent>> & slice_spec) override;
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor
REVISION: 2021/12/14
REVISION: 2021/12/24
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -110,6 +110,9 @@ public:
/** Initiates tensor operand prefetch for a given tensor operation. **/
virtual bool prefetch(const numerics::TensorOperation & op) = 0;
/** Clears all internal node executor caches **/
virtual void clearCache() = 0;
/** Returns a local copy of a given tensor slice. **/
virtual std::shared_ptr<talsh::Tensor> getLocalTensor(const numerics::Tensor & tensor,
const std::vector<std::pair<DimOffset,DimExtent>> & slice_spec) = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment