Commit ee550dac authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Implemented CuQuantumExecutor::parseTensorNetwork


Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent 3ed2fe51
......@@ -44,9 +44,11 @@ namespace runtime {
struct TensorDescriptor {
std::vector<int64_t> extents; //tensor dimension extents
std::vector<int64_t> strides; //tensor dimension strides (optional)
void * body_ptr = nullptr; //pointer to the tensor body image
std::size_t volume = 0; //tensor body volume
cudaDataType_t data_type; //tensor element data type
std::size_t volume = 0; //tensor body volume
std::size_t size = 0; //tensor body size (bytes)
void * src_ptr = nullptr; //non-owning pointer to the tensor body source image
std::vector<void*> dst_ptr; //non-owning pointer to the tensor body dest image (for all GPU)
};
struct TensorNetworkReq {
......@@ -54,14 +56,25 @@ struct TensorNetworkReq {
std::shared_ptr<numerics::TensorNetwork> network; //tensor network specification
std::unordered_map<numerics::TensorHashType,TensorDescriptor> tensor_descriptors; //tensor descriptors (shape, volume, data type, body)
std::unordered_map<unsigned int, std::vector<int32_t>> tensor_modes; //indices associated with tensor dimensions (key is the original tensor id)
std::unordered_map<int32_t,int64_t> index_extents; //extent of each registered tensor mode
std::unordered_map<int32_t,int64_t> mode_extents; //extent of each registered tensor mode
int32_t * num_modes_in = nullptr;
int64_t ** extents_in = nullptr;
int64_t ** strides_in = nullptr;
int32_t ** modes_in = nullptr;
uint32_t * alignments_in = nullptr;
int32_t num_modes_out;
int64_t * extents_out = nullptr;
int64_t * strides_out = nullptr;
int32_t * modes_out = nullptr;
uint32_t alignment_out;
std::vector<void*> memory_window_ptr; //end of the GPU memory segment allocated for the tensors
cutensornetNetworkDescriptor_t net_descriptor;
cutensornetContractionOptimizerConfig_t opt_config;
cutensornetContractionOptimizerInfo_t opt_info;
cutensornetContractionPlan_t comp_plan;
cudaStream_t stream;
cudaDataType_t data_type;
cutensornetComputeType_t compute_type;
cudaStream_t stream;
};
......@@ -188,9 +201,90 @@ void CuQuantumExecutor::sync()
}
cudaDataType_t getCudaDataType(const TensorElementType elem_type)
{
cudaDataType_t cuda_data_type;
switch(elem_type){
case TensorElementType::REAL32: cuda_data_type = CUDA_R_32F; break;
case TensorElementType::REAL64: cuda_data_type = CUDA_R_64F; break;
case TensorElementType::COMPLEX32: cuda_data_type = CUDA_C_32F; break;
case TensorElementType::COMPLEX64: cuda_data_type = CUDA_C_64F; break;
default:
assert(false);
}
return cuda_data_type;
}
void CuQuantumExecutor::parseTensorNetwork(std::shared_ptr<TensorNetworkReq> tn_req)
{
const auto & net = *(tn_req->network);
const int32_t num_input_tensors = net.getNumTensors();
tn_req->num_modes_in = new int32_t[num_input_tensors];
tn_req->extents_in = new int64_t*[num_input_tensors];
tn_req->strides_in = new int64_t*[num_input_tensors];
tn_req->modes_in = new int32_t*[num_input_tensors];
tn_req->alignments_in = new uint32_t[num_input_tensors];
for(unsigned int i = 0; i < num_input_tensors; ++i) tn_req->strides_in[i] = NULL;
for(unsigned int i = 0; i < num_input_tensors; ++i) tn_req->alignments_in[i] = MEM_ALIGNMENT;
tn_req->strides_out = NULL;
tn_req->alignment_out = MEM_ALIGNMENT;
int32_t mode_id = 0, tens_num = 0;
for(auto iter = net.cbegin(); iter != net.cend(); ++iter){
const auto tens_id = iter->first;
const auto & tens = iter->second;
const auto tens_hash = tens.getTensor()->getTensorHash();
const auto tens_vol = tens.getTensor()->getVolume();
const auto tens_rank = tens.getRank();
const auto tens_type = tens.getElementType();
const auto & tens_legs = tens.getTensorLegs();
const auto & tens_dims = tens.getDimExtents();
auto res0 = tn_req->tensor_descriptors.emplace(std::make_pair(tens_hash,TensorDescriptor{}));
if(res0.second){
auto & descr = res0.first->second;
descr.extents.resize(tens_rank);
for(unsigned int i = 0; i < tens_rank; ++i) descr.extents[i] = tens_dims[i];
descr.data_type = getCudaDataType(tens_type);
descr.volume = tens_vol;
descr.src_ptr = tensor_data_access_func_(*(tens.getTensor()),DEV_HOST,0,&(descr.size));
assert(descr.src_ptr != nullptr);
}
auto res1 = tn_req->tensor_modes.emplace(std::make_pair(tens_id,std::vector<int32_t>(tens_rank)));
assert(res1.second);
for(unsigned int i = 0; i < tens_rank; ++i){
const auto other_tens_id = tens_legs[i].getTensorId();
const auto other_tens_leg_id = tens_legs[i].getDimensionId();
auto other_tens_iter = tn_req->tensor_modes.find(other_tens_id);
if(other_tens_iter == tn_req->tensor_modes.end()){
res1.first->second[i] = ++mode_id;
auto new_mode = tn_req->mode_extents.emplace(std::make_pair(mode_id,tens_dims[i]));
}else{
res1.first->second[i] = other_tens_iter->second[other_tens_leg_id];
}
}
if(tens_id == 0){ //output tensor
tn_req->num_modes_out = tens_rank;
tn_req->extents_out = res0.first->second.extents.data();
tn_req->modes_out = res1.first->second.data();
}else{ //input tensors
tn_req->num_modes_in[tens_num] = tens_rank;
tn_req->extents_in[tens_num] = res0.first->second.extents.data();
tn_req->modes_in[tens_num] = res1.first->second.data();
++tens_num;
}
}
HANDLE_CTN_ERROR(cutensornetCreateNetworkDescriptor(gpu_attr_[0].second.cutn_handle,num_input_tensors,
tn_req->num_modes_in,tn_req->extents_in,tn_req->strides_in,tn_req->modes_in,tn_req->alignments_in,
tn_req->num_modes_out,tn_req->extents_out,tn_req->strides_out,tn_req->modes_out,tn_req->alignment_out,
tn_req->data_type,tn_req->compute_type,&(tn_req->net_descriptor)));
HANDLE_CUDA_ERROR(cudaStreamCreate(&(tn_req->stream)));
return;
}
......
......@@ -30,7 +30,7 @@ class Tensor;
namespace exatn {
namespace runtime {
using TensorImplFunc = std::function<const void*(const numerics::Tensor &, int, int, std::size_t *)>;
using TensorImplFunc = std::function<void*(const numerics::Tensor &, int, int, std::size_t *)>;
using TensorImplTalshFunc = std::function<std::shared_ptr<talsh::Tensor>(const numerics::Tensor &, int, int)>;
struct TensorNetworkReq;
......
......@@ -33,7 +33,7 @@ void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> no
if(node_executor){
cuquantum_executor_ = std::make_shared<CuQuantumExecutor>(
[this](const numerics::Tensor & tensor, int device_kind, int device_id, std::size_t * size){
const void * data_ptr = this->node_executor_->getTensorImage(tensor,device_kind,device_id,size);
void * data_ptr = this->node_executor_->getTensorImage(tensor,device_kind,device_id,size);
return data_ptr;
}
);
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor: Exatensor
REVISION: 2021/12/27
REVISION: 2021/12/30
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -87,10 +87,10 @@ public:
std::shared_ptr<talsh::Tensor> getLocalTensor(const numerics::Tensor & tensor,
const std::vector<std::pair<DimOffset,DimExtent>> & slice_spec) override;
const void * getTensorImage(const numerics::Tensor & tensor,
int device_kind,
int device_id,
std::size_t * size = nullptr) const override {return nullptr;}
void * getTensorImage(const numerics::Tensor & tensor,
int device_kind,
int device_id,
std::size_t * size = nullptr) const override {return nullptr;}
const std::string name() const override {return "exatensor-node-executor";}
const std::string description() const override {return "ExaTENSOR tensor graph node executor";}
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor: Talsh
REVISION: 2021/12/27
REVISION: 2021/12/30
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -1459,9 +1459,9 @@ std::shared_ptr<talsh::Tensor> TalshNodeExecutor::getLocalTensor(const numerics:
}
const void * TalshNodeExecutor::getTensorImage(const numerics::Tensor & tensor,
int device_kind, int device_id,
std::size_t * size) const
void * TalshNodeExecutor::getTensorImage(const numerics::Tensor & tensor,
int device_kind, int device_id,
std::size_t * size) const
{
const auto tensor_hash = tensor.getTensorHash();
auto tens_pos = tensors_.find(tensor_hash);
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor: Talsh
REVISION: 2021/12/27
REVISION: 2021/12/30
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -99,10 +99,10 @@ public:
/** Returns a non-owning pointer to a local tensor data image on a given device.
If unsuccessful, returns nullptr. **/
const void * getTensorImage(const numerics::Tensor & tensor, //in: tensor
int device_kind, //in: device kind (implementation specific)
int device_id, //in: device id: [0,1,2,..]
std::size_t * size = nullptr) const override; //out: tensor data image size in bytes
void * getTensorImage(const numerics::Tensor & tensor, //in: tensor
int device_kind, //in: device kind (implementation specific)
int device_id, //in: device id: [0,1,2,..]
std::size_t * size = nullptr) const override; //out: tensor data image size in bytes
/** Finishes tensor operand prefetching for a given tensor operation. **/
bool finishPrefetching(const numerics::TensorOperation & op); //in: tensor operation
......
/** ExaTN:: Tensor Runtime: Tensor graph node executor
REVISION: 2021/12/27
REVISION: 2021/12/30
Copyright (C) 2018-2021 Dmitry Lyakh, Tiffany Mintz, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
......@@ -119,10 +119,10 @@ public:
/** Returns a non-owning pointer to a local tensor data image on a given device.
If unsuccessful, returns nullptr. **/
virtual const void * getTensorImage(const numerics::Tensor & tensor, //in: tensor
int device_kind, //in: device kind (implementation specific)
int device_id, //in: device id: [0,1,2,..]
std::size_t * size = nullptr) const = 0; //out: tensor data image size in bytes
virtual void * getTensorImage(const numerics::Tensor & tensor, //in: tensor
int device_kind, //in: device kind (implementation specific)
int device_id, //in: device id: [0,1,2,..]
std::size_t * size = nullptr) const = 0; //out: tensor data image size in bytes
/** Clones. **/
virtual std::shared_ptr<TensorNodeExecutor> clone() = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment