Commit 34a15d66 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Implementing multi-way parallelism for TensorExpansion evaluation, needs domain resolution ...



Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent f9738392
/** ExaTN::Numerics: General client header (free function API) /** ExaTN::Numerics: General client header (free function API)
REVISION: 2021/08/12 REVISION: 2021/09/25
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
...@@ -770,26 +770,30 @@ inline bool evaluateSync(const ProcessGroup & process_group, //in: chosen group ...@@ -770,26 +770,30 @@ inline bool evaluateSync(const ProcessGroup & process_group, //in: chosen group
/** Evaluates a tensor network expansion into the explicitly provided tensor accumulator. **/ /** Evaluates a tensor network expansion into the explicitly provided tensor accumulator. **/
inline bool evaluate(TensorExpansion & expansion, //in: tensor network expansion inline bool evaluate(TensorExpansion & expansion, //in: tensor network expansion
std::shared_ptr<Tensor> accumulator) //inout: tensor accumulator std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator
{return numericalServer->submit(expansion,accumulator);} unsigned int parallel_width = 1) //in: requested number of execution subgroups running in parallel
{return numericalServer->submit(expansion,accumulator,parallel_width);}
inline bool evaluateSync(TensorExpansion & expansion, //in: tensor network expansion inline bool evaluateSync(TensorExpansion & expansion, //in: tensor network expansion
std::shared_ptr<Tensor> accumulator) //inout: tensor accumulator std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator
unsigned int parallel_width = 1) //in: requested number of execution subgroups running in parallel
{if(!accumulator) return false; {if(!accumulator) return false;
bool success = numericalServer->submit(expansion,accumulator); bool success = numericalServer->submit(expansion,accumulator,parallel_width);
if(success) success = numericalServer->sync(*accumulator); if(success) success = numericalServer->sync(*accumulator);
return success;} return success;}
inline bool evaluate(const ProcessGroup & process_group, //in: chosen group of MPI processes inline bool evaluate(const ProcessGroup & process_group, //in: chosen group of MPI processes
TensorExpansion & expansion, //in: tensor network expansion TensorExpansion & expansion, //in: tensor network expansion
std::shared_ptr<Tensor> accumulator) //inout: tensor accumulator std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator
{return numericalServer->submit(process_group,expansion,accumulator);} unsigned int parallel_width = 1) //in: requested number of execution subgroups running in parallel
{return numericalServer->submit(process_group,expansion,accumulator,parallel_width);}
inline bool evaluateSync(const ProcessGroup & process_group, //in: chosen group of MPI processes inline bool evaluateSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
TensorExpansion & expansion, //in: tensor network expansion TensorExpansion & expansion, //in: tensor network expansion
std::shared_ptr<Tensor> accumulator) //inout: tensor accumulator std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator
unsigned int parallel_width = 1) //in: requested number of execution subgroups running in parallel
{if(!accumulator) return false; {if(!accumulator) return false;
bool success = numericalServer->submit(process_group,expansion,accumulator); bool success = numericalServer->submit(process_group,expansion,accumulator,parallel_width);
if(success) success = numericalServer->sync(process_group,*accumulator); if(success) success = numericalServer->sync(process_group,*accumulator);
return success;} return success;}
......
/** ExaTN::Numerics: Numerical server /** ExaTN::Numerics: Numerical server
REVISION: 2021/09/22 REVISION: 2021/09/25
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
...@@ -866,55 +866,64 @@ bool NumServer::submit(const ProcessGroup & process_group, ...@@ -866,55 +866,64 @@ bool NumServer::submit(const ProcessGroup & process_group,
} }
bool NumServer::submit(TensorExpansion & expansion, bool NumServer::submit(TensorExpansion & expansion,
std::shared_ptr<Tensor> accumulator) std::shared_ptr<Tensor> accumulator,
unsigned int parallel_width)
{ {
return submit(getDefaultProcessGroup(),expansion,accumulator); return submit(getDefaultProcessGroup(),expansion,accumulator,parallel_width);
} }
bool NumServer::submit(std::shared_ptr<TensorExpansion> expansion, bool NumServer::submit(std::shared_ptr<TensorExpansion> expansion,
std::shared_ptr<Tensor> accumulator) std::shared_ptr<Tensor> accumulator,
unsigned int parallel_width)
{ {
return submit(getDefaultProcessGroup(),expansion,accumulator); return submit(getDefaultProcessGroup(),expansion,accumulator,parallel_width);
} }
bool NumServer::submit(const ProcessGroup & process_group, bool NumServer::submit(const ProcessGroup & process_group,
TensorExpansion & expansion, TensorExpansion & expansion,
std::shared_ptr<Tensor> accumulator) std::shared_ptr<Tensor> accumulator,
unsigned int parallel_width)
{ {
if(!process_group.rankIsIn(process_rank_)) return true; //process is not in the group: Do nothing unsigned int local_rank;
if(!process_group.rankIsIn(process_rank_,&local_rank)) return true; //process is not in the group: Do nothing
assert(accumulator); assert(accumulator);
auto tensor_mapper = getTensorMapper(process_group); if(parallel_width <= 1){ //all processes execute all tensor networks one-by-one
std::list<std::shared_ptr<TensorOperation>> accumulations; auto tensor_mapper = getTensorMapper(process_group);
for(auto component = expansion.begin(); component != expansion.end(); ++component){ std::list<std::shared_ptr<TensorOperation>> accumulations;
//Evaluate the tensor network component (compute its output tensor): for(auto component = expansion.begin(); component != expansion.end(); ++component){
auto & network = *(component->network); //Evaluate the tensor network component (compute its output tensor):
auto submitted = submit(process_group,network); if(!submitted) return false; auto & network = *(component->network);
//Create accumulation operation for the scaled computed output tensor: auto submitted = submit(process_group,network); if(!submitted) return false;
bool conjugated; //Create accumulation operation for the scaled computed output tensor:
auto output_tensor = network.getTensor(0,&conjugated); assert(!conjugated); //output tensor cannot be conjugated bool conjugated;
std::shared_ptr<TensorOperation> op = tensor_op_factory_->createTensorOp(TensorOpCode::ADD); auto output_tensor = network.getTensor(0,&conjugated); assert(!conjugated); //output tensor cannot be conjugated
op->setTensorOperand(accumulator); std::shared_ptr<TensorOperation> op = tensor_op_factory_->createTensorOp(TensorOpCode::ADD);
op->setTensorOperand(output_tensor,conjugated); op->setTensorOperand(accumulator);
op->setScalar(0,component->coefficient); op->setTensorOperand(output_tensor,conjugated);
std::string add_pattern; op->setScalar(0,component->coefficient);
auto generated = generate_addition_pattern(accumulator->getRank(),add_pattern,false, std::string add_pattern;
accumulator->getName(),output_tensor->getName()); auto generated = generate_addition_pattern(accumulator->getRank(),add_pattern,false,
assert(generated); accumulator->getName(),output_tensor->getName());
op->setIndexPattern(add_pattern); assert(generated);
accumulations.emplace_back(op); op->setIndexPattern(add_pattern);
} accumulations.emplace_back(op);
//Submit all previously created accumulation operations: }
for(auto & accumulation: accumulations){ //Submit all previously created accumulation operations:
auto submitted = submit(accumulation,tensor_mapper); if(!submitted) return false; for(auto & accumulation: accumulations){
auto submitted = submit(accumulation,tensor_mapper); if(!submitted) return false;
}
}else{ //tensor networks will be distributed among subgroups of processes
std::abort(); //`Finish
} }
return true; return true;
} }
bool NumServer::submit(const ProcessGroup & process_group, bool NumServer::submit(const ProcessGroup & process_group,
std::shared_ptr<TensorExpansion> expansion, std::shared_ptr<TensorExpansion> expansion,
std::shared_ptr<Tensor> accumulator) std::shared_ptr<Tensor> accumulator,
unsigned int parallel_width)
{ {
if(expansion) return submit(process_group,*expansion,accumulator); if(expansion) return submit(process_group,*expansion,accumulator,parallel_width);
return false; return false;
} }
......
/** ExaTN::Numerics: Numerical server /** ExaTN::Numerics: Numerical server
REVISION: 2021/09/24 REVISION: 2021/09/25
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh) Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
...@@ -405,15 +405,19 @@ public: ...@@ -405,15 +405,19 @@ public:
tensor. By default all parallel processes will be processing the tensor network, tensor. By default all parallel processes will be processing the tensor network,
otherwise the desired process subset needs to be explicitly specified. **/ otherwise the desired process subset needs to be explicitly specified. **/
bool submit(TensorExpansion & expansion, //in: tensor expansion for numerical evaluation bool submit(TensorExpansion & expansion, //in: tensor expansion for numerical evaluation
std::shared_ptr<Tensor> accumulator); //inout: tensor accumulator (result) std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator (result)
unsigned int parallel_width = 1); //in: requested number of execution subgroups running in parallel
bool submit(std::shared_ptr<TensorExpansion> expansion, //in: tensor expansion for numerical evaluation bool submit(std::shared_ptr<TensorExpansion> expansion, //in: tensor expansion for numerical evaluation
std::shared_ptr<Tensor> accumulator); //inout: tensor accumulator (result) std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator (result)
unsigned int parallel_width = 1); //in: requested number of execution subgroups running in parallel
bool submit(const ProcessGroup & process_group, //in: chosen group of MPI processes bool submit(const ProcessGroup & process_group, //in: chosen group of MPI processes
TensorExpansion & expansion, //in: tensor expansion for numerical evaluation TensorExpansion & expansion, //in: tensor expansion for numerical evaluation
std::shared_ptr<Tensor> accumulator); //inout: tensor accumulator (result) std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator (result)
unsigned int parallel_width = 1); //in: requested number of execution subgroups running in parallel
bool submit(const ProcessGroup & process_group, //in: chosen group of MPI processes bool submit(const ProcessGroup & process_group, //in: chosen group of MPI processes
std::shared_ptr<TensorExpansion> expansion, //in: tensor expansion for numerical evaluation std::shared_ptr<TensorExpansion> expansion, //in: tensor expansion for numerical evaluation
std::shared_ptr<Tensor> accumulator); //inout: tensor accumulator (result) std::shared_ptr<Tensor> accumulator, //inout: tensor accumulator (result)
unsigned int parallel_width = 1); //in: requested number of execution subgroups running in parallel
/** Synchronizes all update operations on a given tensor. /** Synchronizes all update operations on a given tensor.
Changing wait to FALSE, only tests for completion. Changing wait to FALSE, only tests for completion.
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "errors.hpp" #include "errors.hpp"
//Test activation: //Test activation:
/*#define EXATN_TEST0 #define EXATN_TEST0
#define EXATN_TEST1 #define EXATN_TEST1
#define EXATN_TEST2 #define EXATN_TEST2
#define EXATN_TEST3 #define EXATN_TEST3
...@@ -44,9 +44,9 @@ ...@@ -44,9 +44,9 @@
#define EXATN_TEST23 #define EXATN_TEST23
#define EXATN_TEST24 #define EXATN_TEST24
#define EXATN_TEST25 #define EXATN_TEST25
#define EXATN_TEST26*/ #define EXATN_TEST26
//#define EXATN_TEST27 //requires input file from source //#define EXATN_TEST27 //requires input file from source
#define EXATN_TEST28 //requires input file from source //#define EXATN_TEST28 //requires input file from source
//#define EXATN_TEST30 //#define EXATN_TEST30
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment