Commit 958e1f51 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Fixed parallel evaluation of tensor expansions


Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent 2d6075ec
/** ExaTN::Numerics: Numerical server
REVISION: 2021/10/02
REVISION: 2021/10/04
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
......@@ -1005,6 +1005,7 @@ bool NumServer::submit(const ProcessGroup & process_group,
assert(generated);
accumulation->setIndexPattern(add_pattern);
success = submit(accumulation,local_tensor_mapper); assert(success);
success = sync(*process_subgroup); assert(success);
success = sync(process_group); assert(success);
success = scaleTensor(accumulator->getName(),1.0/static_cast<double>(my_subgroup_size)); assert(success);
success = destroyTensor(local_accumulator->getName()); assert(success);
......@@ -1098,8 +1099,13 @@ bool NumServer::sync(const ProcessGroup & process_group, bool wait)
if(wait){
auto errc = MPI_Barrier(process_group.getMPICommProxy().getRef<MPI_Comm>());
success = success && (errc == MPI_SUCCESS);
if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
<< "]: Globally synchronized all operations" << std::endl << std::flush;
if(success){
if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
<< "]: Globally synchronized all operations" << std::endl << std::flush;
}else{
std::cout << "#ERROR(exatn::sync)[" << process_rank_ << "]: MPI_Barrier error " << errc << std::endl;
assert(false);
}
}
#endif
}
......@@ -1521,12 +1527,18 @@ bool NumServer::initTensorFileSync(const std::string & name,
bool NumServer::initTensorRnd(const std::string & name)
{
return transformTensor(name,std::shared_ptr<TensorMethod>(new numerics::FunctorInitRnd()));
const auto & process_group = getTensorProcessGroup(name);
bool success = transformTensor(name,std::shared_ptr<TensorMethod>(new numerics::FunctorInitRnd()));
if(success) success = broadcastTensor(process_group,name,0);
return success;
}
bool NumServer::initTensorRndSync(const std::string & name)
{
return transformTensorSync(name,std::shared_ptr<TensorMethod>(new numerics::FunctorInitRnd()));
const auto & process_group = getTensorProcessGroup(name);
bool success = transformTensorSync(name,std::shared_ptr<TensorMethod>(new numerics::FunctorInitRnd()));
if(success) success = broadcastTensorSync(process_group,name,0);
return success;
}
bool NumServer::initTensorsRnd(TensorNetwork & tensor_network)
......
......@@ -14,6 +14,7 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
namespace exatn{
unsigned int TensorNetworkOptimizer::debug{0};
int TensorNetworkOptimizer::focus{-1};
TensorNetworkOptimizer::TensorNetworkOptimizer(std::shared_ptr<TensorOperator> tensor_operator,
......@@ -21,7 +22,7 @@ TensorNetworkOptimizer::TensorNetworkOptimizer(std::shared_ptr<TensorOperator> t
double tolerance):
tensor_operator_(tensor_operator), vector_expansion_(vector_expansion),
max_iterations_(DEFAULT_MAX_ITERATIONS), micro_iterations_(DEFAULT_MICRO_ITERATIONS),
epsilon_(DEFAULT_LEARN_RATE), tolerance_(tolerance)
epsilon_(DEFAULT_LEARN_RATE), tolerance_(tolerance), parallel_(true)
{
if(!vector_expansion_->isKet()){
std::cout << "#ERROR(exatn:TensorNetworkOptimizer): The tensor network vector expansion must be a ket!"
......@@ -84,6 +85,11 @@ bool TensorNetworkOptimizer::optimize_sd(const ProcessGroup & process_group)
unsigned int local_rank; //local process rank within the process group
if(!process_group.rankIsIn(exatn::getProcessRank(),&local_rank)) return true; //process is not in the group: Do nothing
const auto num_procs = process_group.getSize();
if(TensorNetworkOptimizer::focus >= 0){
if(getProcessRank() != TensorNetworkOptimizer::focus) TensorNetworkOptimizer::debug = 0;
}
//Balance-normalize the tensor network vector expansion:
//bool success = balanceNormalizeNorm2Sync(*vector_expansion_,1.0,1.0,true); assert(success);
......@@ -204,14 +210,14 @@ bool TensorNetworkOptimizer::optimize_sd(const ProcessGroup & process_group)
for(unsigned int micro_iteration = 0; micro_iteration < micro_iterations_; ++micro_iteration){
//Normalize the optimized tensor w.r.t. metrics:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,metrics_expectation,scalar_norm); assert(done);
done = evaluateSync(process_group,metrics_expectation,scalar_norm,num_procs); assert(done);
double tens_norm = 0.0;
done = computeNorm1Sync("_scalar_norm",tens_norm); assert(done);
tens_norm = std::sqrt(tens_norm);
done = scaleTensorSync(environment.tensor->getName(),1.0/tens_norm); assert(done); //`Only works with no repeated tensors
//Compute the operator expectation value w.r.t. the optimized tensor:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,operator_expectation,scalar_norm); assert(done);
done = evaluateSync(process_group,operator_expectation,scalar_norm,num_procs); assert(done);
std::complex<double> expect_val{0.0,0.0};
switch(scalar_norm->getElementType()){
case TensorElementType::REAL32:
......@@ -245,7 +251,7 @@ bool TensorNetworkOptimizer::optimize_sd(const ProcessGroup & process_group)
//Initialize the gradient tensor to zero:
done = initTensorSync(environment.gradient->getName(),0.0); assert(done);
//Evaluate the gradient tensor expansion:
done = evaluateSync(process_group,environment.gradient_expansion,environment.gradient); assert(done);
done = evaluateSync(process_group,environment.gradient_expansion,environment.gradient,num_procs); assert(done);
//Compute the norm of the gradient tensor:
double grad_norm = 0.0;
done = computeNorm2Sync(environment.gradient->getName(),grad_norm); assert(done);
......@@ -254,14 +260,14 @@ bool TensorNetworkOptimizer::optimize_sd(const ProcessGroup & process_group)
//Compute the convergence criterion:
double denom = 0.0;
done = initTensorSync(environment.gradient_aux->getName(),0.0); assert(done);
done = evaluateSync(process_group,environment.operator_gradient,environment.gradient_aux); assert(done);
done = evaluateSync(process_group,environment.operator_gradient,environment.gradient_aux,num_procs); assert(done);
tens_norm = 0.0;
done = computeNorm2Sync(environment.gradient_aux->getName(),tens_norm); assert(done);
if(TensorNetworkOptimizer::debug > 1) std::cout << environment.tensor->getName()
<< ": |H|x> 2-norm = " << tens_norm;
denom += tens_norm;
done = initTensorSync(environment.gradient_aux->getName(),0.0); assert(done);
done = evaluateSync(process_group,environment.metrics_gradient,environment.gradient_aux); assert(done);
done = evaluateSync(process_group,environment.metrics_gradient,environment.gradient_aux,num_procs); assert(done);
tens_norm = 0.0;
done = computeNorm2Sync(environment.gradient_aux->getName(),tens_norm); assert(done);
if(TensorNetworkOptimizer::debug > 1) std::cout << "; |S|x> 2-norm = " << tens_norm
......@@ -285,7 +291,7 @@ bool TensorNetworkOptimizer::optimize_sd(const ProcessGroup & process_group)
environment.hessian_expansion.printCoefficients();
}
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,environment.hessian_expansion,scalar_norm); assert(done);
done = evaluateSync(process_group,environment.hessian_expansion,scalar_norm,num_procs); assert(done);
denom = 0.0;
switch(scalar_norm->getElementType()){
case TensorElementType::REAL32:
......@@ -323,7 +329,7 @@ bool TensorNetworkOptimizer::optimize_sd(const ProcessGroup & process_group)
if(NORMALIZE_WITH_METRICS){
//Normalize the optimized tensor w.r.t. metrics:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,metrics_expectation,scalar_norm); assert(done);
done = evaluateSync(process_group,metrics_expectation,scalar_norm,num_procs); assert(done);
tens_norm = 0.0;
done = computeNorm1Sync("_scalar_norm",tens_norm); assert(done);
tens_norm = std::sqrt(tens_norm);
......@@ -367,9 +373,17 @@ bool TensorNetworkOptimizer::optimize_sd(const ProcessGroup & process_group)
}
void TensorNetworkOptimizer::resetDebugLevel(unsigned int level)
void TensorNetworkOptimizer::enableParallelization(bool parallel)
{
parallel_ = parallel;
return;
}
void TensorNetworkOptimizer::resetDebugLevel(unsigned int level, int focus_process)
{
TensorNetworkOptimizer::debug = level;
TensorNetworkOptimizer::focus = focus_process;
return;
}
......
......@@ -30,6 +30,7 @@ class TensorNetworkOptimizer{
public:
static unsigned int debug;
static int focus;
static constexpr const double DEFAULT_TOLERANCE = 1e-4;
static constexpr const double DEFAULT_LEARN_RATE = 0.5;
......@@ -65,7 +66,11 @@ public:
/** Returns the optimized tensor network expansion forming the optimal bra/ket vectors. **/
std::shared_ptr<TensorExpansion> getSolution() const;
static void resetDebugLevel(unsigned int level = 0);
/** Enables/disables coarse-grain parallelization over tensor networks. **/
void enableParallelization(bool parallel = true);
static void resetDebugLevel(unsigned int level = 0, //in: debug level
int focus_process = -1); //in: process to focus on (-1: all)
protected:
......@@ -91,6 +96,7 @@ private:
unsigned int micro_iterations_; //number of microiterations per optimized tensor
double epsilon_; //learning rate for the gradient descent based tensor update
double tolerance_; //numerical convergence tolerance (for the gradient)
bool parallel_; //enables/disables coarse-grain parallelization over tensor networks
std::vector<Environment> environments_; //optimization environments for each optimizable tensor
};
......
......@@ -15,13 +15,14 @@ Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
namespace exatn{
unsigned int TensorNetworkReconstructor::debug{0};
int TensorNetworkReconstructor::focus{-1};
TensorNetworkReconstructor::TensorNetworkReconstructor(std::shared_ptr<TensorExpansion> expansion,
std::shared_ptr<TensorExpansion> approximant,
double tolerance):
expansion_(expansion), approximant_(approximant),
max_iterations_(DEFAULT_MAX_ITERATIONS), epsilon_(DEFAULT_LEARN_RATE), tolerance_(tolerance),
max_iterations_(DEFAULT_MAX_ITERATIONS), epsilon_(DEFAULT_LEARN_RATE), tolerance_(tolerance), parallel_(true),
input_norm_(0.0), output_norm_(0.0), residual_norm_(0.0), fidelity_(0.0)
{
if(!expansion_->isKet()){
......@@ -125,10 +126,15 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
{
unsigned int local_rank; //local process rank within the process group
if(!process_group.rankIsIn(exatn::getProcessRank(),&local_rank)) return true; //process is not in the group: Do nothing
const auto num_procs = process_group.getSize();
assert(residual_norm != nullptr);
assert(fidelity != nullptr);
if(TensorNetworkReconstructor::focus >= 0){
if(getProcessRank() != TensorNetworkReconstructor::focus) TensorNetworkReconstructor::debug = 0;
}
input_norm_ = 0.0;
output_norm_ = 0.0;
residual_norm_ = 0.0;
......@@ -235,7 +241,7 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
auto scalar_norm = makeSharedTensor("_scalar_norm");
bool done = createTensorSync(scalar_norm,environments_[0].tensor->getElementType()); assert(done);
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,input_norm,scalar_norm); assert(done);
done = evaluateSync(process_group,input_norm,scalar_norm,num_procs); assert(done);
input_norm_ = 0.0;
done = computeNorm1Sync("_scalar_norm",input_norm_); assert(done);
input_norm_ = std::sqrt(input_norm_);
......@@ -244,13 +250,13 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
while(overlap_abs <= DEFAULT_MIN_INITIAL_OVERLAP){
//Compute the approximant norm:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,normalization,scalar_norm); assert(done);
done = evaluateSync(process_group,normalization,scalar_norm,num_procs); assert(done);
output_norm_ = 0.0;
done = computeNorm1Sync("_scalar_norm",output_norm_); assert(done);
output_norm_ = std::sqrt(output_norm_);
//Compute the direct absolute overlap with the approximant:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,overlap,scalar_norm); assert(done);
done = evaluateSync(process_group,overlap,scalar_norm,num_procs); assert(done);
overlap_abs = 0.0;
done = computeNorm1Sync("_scalar_norm",overlap_abs); assert(done);
overlap_abs /= (output_norm_ * input_norm_);
......@@ -291,7 +297,7 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
//Initialize the gradient tensor to zero:
done = initTensorSync(environment.gradient->getName(),0.0); assert(done);
//Evaluate the gradient tensor expansion:
done = evaluateSync(process_group,environment.gradient_expansion,environment.gradient); assert(done);
done = evaluateSync(process_group,environment.gradient_expansion,environment.gradient,num_procs); assert(done);
//Compute the norm of the gradient tensor:
double grad_norm = 0.0;
done = computeNorm2Sync(environment.gradient->getName(),grad_norm); assert(done);
......@@ -304,7 +310,7 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
//Update the optimizable tensor using the computed gradient:
//Compute the optimal step size:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,environment.hessian_expansion,scalar_norm); assert(done);
done = evaluateSync(process_group,environment.hessian_expansion,scalar_norm,num_procs); assert(done);
double hess_grad = 0.0;
done = computeNorm1Sync("_scalar_norm",hess_grad); assert(done);
if(hess_grad > 0.0){
......@@ -331,7 +337,7 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
}
//Compute the residual norm and check convergence:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,residual,scalar_norm); assert(done);
done = evaluateSync(process_group,residual,scalar_norm,num_procs); assert(done);
residual_norm_ = 0.0;
done = computeNorm1Sync("_scalar_norm",residual_norm_); assert(done);
residual_norm_ = std::sqrt(residual_norm_);
......@@ -341,13 +347,13 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
}
//Compute the approximant norm:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,normalization,scalar_norm); assert(done);
done = evaluateSync(process_group,normalization,scalar_norm,num_procs); assert(done);
output_norm_ = 0.0;
done = computeNorm1Sync("_scalar_norm",output_norm_); assert(done);
output_norm_ = std::sqrt(output_norm_);
//Compute the direct overlap:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,overlap,scalar_norm); assert(done);
done = evaluateSync(process_group,overlap,scalar_norm,num_procs); assert(done);
overlap_abs = 0.0;
done = computeNorm1Sync("_scalar_norm",overlap_abs); assert(done);
overlap_abs = overlap_abs / (output_norm_ * input_norm_);
......@@ -374,7 +380,7 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
}*/
//Compute the approximant norm:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,normalization,scalar_norm); assert(done);
done = evaluateSync(process_group,normalization,scalar_norm,num_procs); assert(done);
output_norm_ = 0.0;
done = computeNorm1Sync("_scalar_norm",output_norm_); assert(done);
output_norm_ = std::sqrt(output_norm_);
......@@ -383,13 +389,13 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
<< output_norm_ << std::endl;
//Compute final approximation fidelity and overlap:
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,overlap_conj,scalar_norm); assert(done);
done = evaluateSync(process_group,overlap_conj,scalar_norm,num_procs); assert(done);
overlap_abs = 0.0;
done = computeNorm1Sync("_scalar_norm",overlap_abs); assert(done);
if(TensorNetworkReconstructor::debug > 0)
std::cout << "#DEBUG(exatn::TensorNetworkReconstructor): Conjugated overlap = " << overlap_abs << std::endl;
done = initTensorSync("_scalar_norm",0.0); assert(done);
done = evaluateSync(process_group,overlap,scalar_norm); assert(done);
done = evaluateSync(process_group,overlap,scalar_norm,num_procs); assert(done);
overlap_abs = 0.0;
done = computeNorm1Sync("_scalar_norm",overlap_abs); assert(done);
fidelity_ = std::pow(overlap_abs / (output_norm_ * input_norm_), 2.0);
......@@ -426,9 +432,17 @@ bool TensorNetworkReconstructor::reconstruct_sd(const ProcessGroup & process_gro
}
void TensorNetworkReconstructor::resetDebugLevel(unsigned int level)
void TensorNetworkReconstructor::enableParallelization(bool parallel)
{
parallel_ = parallel;
return;
}
void TensorNetworkReconstructor::resetDebugLevel(unsigned int level, int focus_process)
{
TensorNetworkReconstructor::debug = level;
TensorNetworkReconstructor::focus = focus_process;
return;
}
......
......@@ -32,6 +32,7 @@ class TensorNetworkReconstructor{
public:
static unsigned int debug;
static int focus;
static constexpr const double DEFAULT_TOLERANCE = 1e-5;
static constexpr const double DEFAULT_LEARN_RATE = 0.5;
......@@ -79,7 +80,11 @@ public:
std::shared_ptr<TensorExpansion> getSolution(double * residual_norm, //out: 2-norm of the residual tensor (error)
double * fidelity) const; //out: squared normalized overlap (fidelity)
static void resetDebugLevel(unsigned int level = 0);
/** Enables/disables coarse-grain parallelization over tensor networks. **/
void enableParallelization(bool parallel = true);
static void resetDebugLevel(unsigned int level = 0, //in: debug level
int focus_process = -1); //in: process to focus on (-1: all)
protected:
......@@ -109,6 +114,7 @@ private:
unsigned int max_iterations_; //max number of macro-iterations
double epsilon_; //learning rate for the gradient descent based tensor update
double tolerance_; //numerical reconstruction convergence tolerance (for the gradient)
bool parallel_; //enables/disables coarse-grain parallelization over tensor networks
double input_norm_; //2-norm of the input tensor expansion
double output_norm_; //2-norm of the approximant tensor expansion
......
......@@ -3129,7 +3129,7 @@ TEST(NumServerTester, MCVQEHamiltonian) {
bool success = true;
const int num_sites = 8;
const int bond_dim_lim = 4;
const int bond_dim_lim = 1;
const int max_bond_dim = std::min(static_cast<int>(std::pow(2,num_sites/2)),bond_dim_lim);
//Read the Hamiltonian in spin representation:
......@@ -3150,13 +3150,22 @@ TEST(NumServerTester, MCVQEHamiltonian) {
//Allocate/initialize tensors in the tensor network ansatz:
success = exatn::createTensorsSync(*ansatz_net,TENS_ELEM_TYPE); assert(success);
success = exatn::initTensorsRndSync(*ansatz_net); assert(success);
/*for(auto tens = ansatz_net->begin(); tens != ansatz_net->end(); ++tens){
if(tens->first == 0){
success = exatn::initTensorSync(tens->second.getName(),0.0); assert(success);
}else{
success = exatn::initTensorSync(tens->second.getName(),1e-2); assert(success);
}
}*/
//success = exatn::balanceNormalizeNorm2Sync(*ansatz,1.0,1.0,true); assert(success);
//Perform ground state optimization on a tensor network manifold:
{
std::cout << "Ground state optimization on a tensor network manifold:" << std::endl;
exatn::TensorNetworkOptimizer::resetDebugLevel(1);
exatn::TensorNetworkOptimizer optimizer(hamiltonian_operator,ansatz,5e-4);
exatn::TensorNetworkOptimizer::resetDebugLevel(1,0);
exatn::TensorNetworkOptimizer optimizer(hamiltonian_operator,ansatz,1e-4);
optimizer.enableParallelization(true);
//optimizer.resetMaxIterations(50);
//optimizer.resetMicroIterations(1);
bool converged = optimizer.optimize();
success = exatn::sync(); assert(success);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment