Commit 34a15d66 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Implementing multi-way parallelism for TensorExpansion evaluation, needs domain resolution ...


Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent f9738392
/** ExaTN::Numerics: General client header (free function API)
REVISION: 2021/08/12
REVISION: 2021/09/25
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
......@@ -770,26 +770,30 @@ inline bool evaluateSync(const ProcessGroup & process_group, //in: chosen group
/** Evaluates a tensor network expansion into the explicitly provided tensor accumulator. **/
inline bool evaluate(TensorExpansion & expansion, //in: tensor network expansion
std::shared_ptr<Tensor> accumulator) //inout: tensor accumulator
{return numericalServer->submit(expansion,accumulator);}
std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator
unsigned int parallel_width = 1) //in: requested number of execution subgroups running in parallel
{return numericalServer->submit(expansion,accumulator,parallel_width);}
inline bool evaluateSync(TensorExpansion & expansion, //in: tensor network expansion
std::shared_ptr<Tensor> accumulator) //inout: tensor accumulator
std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator
unsigned int parallel_width = 1) //in: requested number of execution subgroups running in parallel
{if(!accumulator) return false;
bool success = numericalServer->submit(expansion,accumulator);
bool success = numericalServer->submit(expansion,accumulator,parallel_width);
if(success) success = numericalServer->sync(*accumulator);
return success;}
inline bool evaluate(const ProcessGroup & process_group, //in: chosen group of MPI processes
TensorExpansion & expansion, //in: tensor network expansion
std::shared_ptr<Tensor> accumulator) //inout: tensor accumulator
{return numericalServer->submit(process_group,expansion,accumulator);}
std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator
unsigned int parallel_width = 1) //in: requested number of execution subgroups running in parallel
{return numericalServer->submit(process_group,expansion,accumulator,parallel_width);}
inline bool evaluateSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
TensorExpansion & expansion, //in: tensor network expansion
std::shared_ptr<Tensor> accumulator) //inout: tensor accumulator
std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator
unsigned int parallel_width = 1) //in: requested number of execution subgroups running in parallel
{if(!accumulator) return false;
bool success = numericalServer->submit(process_group,expansion,accumulator);
bool success = numericalServer->submit(process_group,expansion,accumulator,parallel_width);
if(success) success = numericalServer->sync(process_group,*accumulator);
return success;}
......
/** ExaTN::Numerics: Numerical server
REVISION: 2021/09/22
REVISION: 2021/09/25
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
......@@ -866,55 +866,64 @@ bool NumServer::submit(const ProcessGroup & process_group,
}
bool NumServer::submit(TensorExpansion & expansion,
std::shared_ptr<Tensor> accumulator)
std::shared_ptr<Tensor> accumulator,
unsigned int parallel_width)
{
return submit(getDefaultProcessGroup(),expansion,accumulator);
return submit(getDefaultProcessGroup(),expansion,accumulator,parallel_width);
}
bool NumServer::submit(std::shared_ptr<TensorExpansion> expansion,
std::shared_ptr<Tensor> accumulator)
std::shared_ptr<Tensor> accumulator,
unsigned int parallel_width)
{
return submit(getDefaultProcessGroup(),expansion,accumulator);
return submit(getDefaultProcessGroup(),expansion,accumulator,parallel_width);
}
bool NumServer::submit(const ProcessGroup & process_group,
TensorExpansion & expansion,
std::shared_ptr<Tensor> accumulator)
std::shared_ptr<Tensor> accumulator,
unsigned int parallel_width)
{
if(!process_group.rankIsIn(process_rank_)) return true; //process is not in the group: Do nothing
unsigned int local_rank;
if(!process_group.rankIsIn(process_rank_,&local_rank)) return true; //process is not in the group: Do nothing
assert(accumulator);
auto tensor_mapper = getTensorMapper(process_group);
std::list<std::shared_ptr<TensorOperation>> accumulations;
for(auto component = expansion.begin(); component != expansion.end(); ++component){
//Evaluate the tensor network component (compute its output tensor):
auto & network = *(component->network);
auto submitted = submit(process_group,network); if(!submitted) return false;
//Create accumulation operation for the scaled computed output tensor:
bool conjugated;
auto output_tensor = network.getTensor(0,&conjugated); assert(!conjugated); //output tensor cannot be conjugated
std::shared_ptr<TensorOperation> op = tensor_op_factory_->createTensorOp(TensorOpCode::ADD);
op->setTensorOperand(accumulator);
op->setTensorOperand(output_tensor,conjugated);
op->setScalar(0,component->coefficient);
std::string add_pattern;
auto generated = generate_addition_pattern(accumulator->getRank(),add_pattern,false,
accumulator->getName(),output_tensor->getName());
assert(generated);
op->setIndexPattern(add_pattern);
accumulations.emplace_back(op);
}
//Submit all previously created accumulation operations:
for(auto & accumulation: accumulations){
auto submitted = submit(accumulation,tensor_mapper); if(!submitted) return false;
if(parallel_width <= 1){ //all processes execute all tensor networks one-by-one
auto tensor_mapper = getTensorMapper(process_group);
std::list<std::shared_ptr<TensorOperation>> accumulations;
for(auto component = expansion.begin(); component != expansion.end(); ++component){
//Evaluate the tensor network component (compute its output tensor):
auto & network = *(component->network);
auto submitted = submit(process_group,network); if(!submitted) return false;
//Create accumulation operation for the scaled computed output tensor:
bool conjugated;
auto output_tensor = network.getTensor(0,&conjugated); assert(!conjugated); //output tensor cannot be conjugated
std::shared_ptr<TensorOperation> op = tensor_op_factory_->createTensorOp(TensorOpCode::ADD);
op->setTensorOperand(accumulator);
op->setTensorOperand(output_tensor,conjugated);
op->setScalar(0,component->coefficient);
std::string add_pattern;
auto generated = generate_addition_pattern(accumulator->getRank(),add_pattern,false,
accumulator->getName(),output_tensor->getName());
assert(generated);
op->setIndexPattern(add_pattern);
accumulations.emplace_back(op);
}
//Submit all previously created accumulation operations:
for(auto & accumulation: accumulations){
auto submitted = submit(accumulation,tensor_mapper); if(!submitted) return false;
}
}else{ //tensor networks will be distributed among subgroups of processes
std::abort(); //`Finish
}
return true;
}
bool NumServer::submit(const ProcessGroup & process_group,
std::shared_ptr<TensorExpansion> expansion,
std::shared_ptr<Tensor> accumulator)
std::shared_ptr<Tensor> accumulator,
unsigned int parallel_width)
{
if(expansion) return submit(process_group,*expansion,accumulator);
if(expansion) return submit(process_group,*expansion,accumulator,parallel_width);
return false;
}
......
/** ExaTN::Numerics: Numerical server
REVISION: 2021/09/24
REVISION: 2021/09/25
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
......@@ -405,15 +405,19 @@ public:
tensor. By default all parallel processes will be processing the tensor network,
otherwise the desired process subset needs to be explicitly specified. **/
bool submit(TensorExpansion & expansion, //in: tensor expansion for numerical evaluation
std::shared_ptr<Tensor> accumulator); //inout: tensor accumulator (result)
std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator (result)
unsigned int parallel_width = 1); //in: requested number of execution subgroups running in parallel
bool submit(std::shared_ptr<TensorExpansion> expansion, //in: tensor expansion for numerical evaluation
std::shared_ptr<Tensor> accumulator); //inout: tensor accumulator (result)
std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator (result)
unsigned int parallel_width = 1); //in: requested number of execution subgroups running in parallel
bool submit(const ProcessGroup & process_group, //in: chosen group of MPI processes
TensorExpansion & expansion, //in: tensor expansion for numerical evaluation
std::shared_ptr<Tensor> accumulator); //inout: tensor accumulator (result)
std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator (result)
unsigned int parallel_width = 1); //in: requested number of execution subgroups running in parallel
bool submit(const ProcessGroup & process_group, //in: chosen group of MPI processes
std::shared_ptr<TensorExpansion> expansion, //in: tensor expansion for numerical evaluation
std::shared_ptr<Tensor> accumulator); //inout: tensor accumulator (result)
std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator (result)
unsigned int parallel_width = 1); //in: requested number of execution subgroups running in parallel
/** Synchronizes all update operations on a given tensor.
Changing wait to FALSE, only tests for completion.
......
......@@ -18,7 +18,7 @@
#include "errors.hpp"
//Test activation:
/*#define EXATN_TEST0
#define EXATN_TEST0
#define EXATN_TEST1
#define EXATN_TEST2
#define EXATN_TEST3
......@@ -44,9 +44,9 @@
#define EXATN_TEST23
#define EXATN_TEST24
#define EXATN_TEST25
#define EXATN_TEST26*/
#define EXATN_TEST26
//#define EXATN_TEST27 //requires input file from source
#define EXATN_TEST28 //requires input file from source
//#define EXATN_TEST28 //requires input file from source
//#define EXATN_TEST30
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment