num_server.hpp 82.4 KB
Newer Older
1
/** ExaTN::Numerics: Numerical server
2
REVISION: 2022/01/17
3

4
5
Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
6

7
8
9
10
/** Rationale:
 (a) Numerical server provides basic tensor network processing functionality:
     + Opening/closing TAProL scopes (top scope 0 "GLOBAL" is open automatically);
     + Creation/destruction of named vector spaces and their named subspaces;
11
     + Registration/retrieval of external data (class BytePacket);
12
     + Registration/retrieval of external tensor methods (class TensorFunctor);
13
14
     + Submission for processing of individual tensor operations, tensor networks
       and tensor network expansions;
15
16
     + Higher-level methods for tensor creation, destruction, and operations on them,
       which use symbolic tensor names for tensor identification and processing.
17
18
19
 (b) Processing of individual tensor operations, tensor networks and tensor network expansions
     has asynchronous semantics: Submit TensorOperation/TensorNetwork/TensorExpansion for
     processing, then synchronize on the tensor-result or the accumulator tensor.
20
21
     Processing of a tensor operation means evaluating all its output tensor operands.
     Processing of a tensor network means evaluating its output tensor (#0),
22
23
24
25
26
     which is automatically initialized to zero before the evaluation.
     Processing of a tensor network expansion means evaluating all constituent
     tensor network components and accumulating them into the accumulator tensor
     with their respective prefactors. Synchronization of processing of a tensor operation,
     tensor network or tensor network expansion means ensuring that the tensor-result,
27
     either the output tensor or the accumulator tensor, has been fully computed.
28
29
30
 (c) Namespace exatn introduces a number of aliases for types imported from exatn::numerics.
     Importantly exatn::TensorMethod, which is talsh::TensorFunctor<Indentifiable>,
     defines the interface which needs to be implemented by the application in order
31
32
     to perform a custom unary transformation/initialization operation on exatn::Tensor.
     This is the only portable way to modify the tensor content in a desired way.
33
34
**/

35
36
#ifndef EXATN_NUM_SERVER_HPP_
#define EXATN_NUM_SERVER_HPP_
37
38

#include "tensor_basic.hpp"
39
#include "space_register.hpp"
40
#include "tensor_range.hpp"
41
#include "tensor.hpp"
42
#include "tensor_composite.hpp"
43
#include "tensor_operation.hpp"
44
#include "tensor_op_factory.hpp"
45
#include "tensor_symbol.hpp"
46
47
48
#include "tensor_network.hpp"
#include "tensor_operator.hpp"
#include "tensor_expansion.hpp"
49
50
#include "network_build_factory.hpp"
#include "contraction_seq_optimizer_factory.hpp"
51

52
53
#include "tensor_runtime.hpp"

54
#include "Identifiable.hpp"
55
56
#include "tensor_method.hpp"
#include "functor_init_val.hpp"
57
#include "functor_init_rnd.hpp"
58
#include "functor_init_dat.hpp"
59
#include "functor_init_delta.hpp"
60
#include "functor_init_proj.hpp"
61
#include "functor_init_file.hpp"
62
#include "functor_scale.hpp"
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
63
#include "functor_maxabs.hpp"
64
65
#include "functor_norm1.hpp"
#include "functor_norm2.hpp"
66
#include "functor_diag_rank.hpp"
67
#include "functor_print.hpp"
68

69
70
#include <iostream>
#include <fstream>
71
#include <memory>
72
#include <vector>
73
#include <string>
74
#include <stack>
75
#include <list>
76
#include <map>
77

78
79
#include "errors.hpp"

80
81
using exatn::Identifiable;

82
83
namespace exatn{

84
//Primary numerics:: types exposed to the user:
85
86
using numerics::VectorSpace;
using numerics::Subspace;
87
using numerics::TensorHashType;
88
using numerics::TensorRange;
89
90
91
92
using numerics::TensorShape;
using numerics::TensorSignature;
using numerics::TensorLeg;
using numerics::Tensor;
93
using numerics::TensorComposite;
94
using numerics::TensorOperation;
95
using numerics::TensorOpFactory;
96
using numerics::TensorNetwork;
97
98
99
100
101
102
103
104
using numerics::TensorOperator;
using numerics::TensorExpansion;

using numerics::NetworkBuilder;
using numerics::NetworkBuildFactory;

using numerics::ContractionSeqOptimizer;
using numerics::ContractionSeqOptimizerFactory;
105

106
107
using numerics::FunctorInitVal;
using numerics::FunctorInitRnd;
108
using numerics::FunctorInitDat;
109
using numerics::FunctorInitDelta;
110
using numerics::FunctorInitProj;
111
using numerics::FunctorInitFile;
112
using numerics::FunctorScale;
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
113
using numerics::FunctorMaxAbs;
114
115
using numerics::FunctorNorm1;
using numerics::FunctorNorm2;
116
using numerics::FunctorDiagRank;
117
using numerics::FunctorPrint;
118

119
120
using TensorMethod = talsh::TensorFunctor<Identifiable>;

121

122
/** Returns the closest owner id (process rank) for a given subtensor. **/
123
124
125
126
127
unsigned int subtensor_owner_id(unsigned int process_rank,          //in: current process rank
                                unsigned int num_processes,         //in: total number of processes
                                unsigned long long subtensor_id,    //in: id of the required subtensor
                                unsigned long long num_subtensors); //in: total number of subtensors

128
/** Returns a range of subtensors [begin,end] owned by the specified process. **/
129
std::pair<unsigned long long, unsigned long long> owned_subtensors(
130
                                                   unsigned int process_rank,          //in: target process rank
131
132
133
                                                   unsigned int num_processes,         //in: total number of processes
                                                   unsigned long long num_subtensors); //in: total number of subtensors

134
/** Returns the subtensor replication level (number of replicated subtensors). **/
135
136
137
138
unsigned int replication_level(const ProcessGroup & process_group, //in: process group
                               std::shared_ptr<Tensor> & tensor);  //in: tensor


139
140
141
142
//Composite tensor mapper (helper):
class CompositeTensorMapper: public TensorMapper{
public:

143
144
145
146
#ifdef MPI_ENABLED
 CompositeTensorMapper(const MPICommProxy & communicator,
                       unsigned int current_rank_in_group,
                       unsigned int num_processes_in_group,
147
                       std::size_t memory_per_process,
148
149
                       const std::unordered_map<std::string,std::shared_ptr<Tensor>> & local_tensors):
  current_process_rank_(current_rank_in_group), group_num_processes_(num_processes_in_group),
150
  memory_per_process_(memory_per_process), intra_comm_(communicator), local_tensors_(local_tensors) {}
151
#else
152
153
 CompositeTensorMapper(unsigned int current_rank_in_group,
                       unsigned int num_processes_in_group,
154
                       std::size_t memory_per_process,
155
156
                       const std::unordered_map<std::string,std::shared_ptr<Tensor>> & local_tensors):
  current_process_rank_(current_rank_in_group), group_num_processes_(num_processes_in_group),
157
  memory_per_process_(memory_per_process), local_tensors_(local_tensors) {}
158
#endif
159
160
161

 virtual ~CompositeTensorMapper() = default;

162
163
164
165
166
167
168
169
170
171
172
173
 /** Returns the closest-to-the-target process rank owning the given subtensor. **/
 virtual unsigned int subtensorOwnerId(unsigned int target_process_rank,                 //in: target process rank
                                       unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                                       unsigned long long num_subtensors) const override //in: total number of subtensors
 {
  assert(target_process_rank < group_num_processes_);
  return subtensor_owner_id(target_process_rank,group_num_processes_,subtensor_id,num_subtensors);
 }

 /** Returns the closest process rank owning the given subtensor. **/
 virtual unsigned int subtensorOwnerId(unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                                       unsigned long long num_subtensors) const override //in: total number of subtensors
174
175
176
177
 {
  return subtensor_owner_id(current_process_rank_,group_num_processes_,subtensor_id,num_subtensors);
 }

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
 /** Returns the first process rank owning the given subtensor (first replica). **/
 virtual unsigned int subtensorFirstOwnerId(unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                                            unsigned long long num_subtensors) const override //in: total number of subtensors
 {
  return subtensor_owner_id(0,group_num_processes_,subtensor_id,num_subtensors);
 }

 /** Returns the number of replicas of a given subtensor within the process group. **/
 virtual unsigned int subtensorNumReplicas(unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                                           unsigned long long num_subtensors) const override //in: total number of subtensors
 {
  unsigned int num_replicas = 1;
  if(num_subtensors < group_num_processes_){
   assert(group_num_processes_ % num_subtensors == 0);
   num_replicas = group_num_processes_ / num_subtensors;
  }
  return num_replicas;
 }

 /** Returns the range of subtensors (by their ids) owned by the given process rank. **/
 virtual std::pair<unsigned long long, unsigned long long> ownedSubtensors(unsigned int process_rank,                        //in: target process rank
                                                                           unsigned long long num_subtensors) const override //in: total number of subtensors
200
201
202
203
 {
  return owned_subtensors(process_rank,group_num_processes_,num_subtensors);
 }

204
205
206
 /** Returns whether or not the given subtensor is owned by the current process. **/
 virtual bool isLocalSubtensor(unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                               unsigned long long num_subtensors) const override //in: total number of subtensors
207
208
209
210
 {
  return (subtensorOwnerId(subtensor_id,num_subtensors) == current_process_rank_);
 }

211
212
 /** Returns whether or not the given subtensor is owned by the current process. **/
 virtual bool isLocalSubtensor(const Tensor & subtensor) const override //in: subtensor
213
214
215
216
 {
  return (local_tensors_.find(subtensor.getName()) != local_tensors_.cend());
 }

217
218
219
 /** Returns the amount of memory per process. **/
 virtual std::size_t getMemoryPerProcess() const override {return memory_per_process_;}

220
 /** Returns the rank of the current process. **/
221
222
 virtual unsigned int getProcessRank() const override {return current_process_rank_;}

223
 /** Returns the total number of processes in the group. **/
224
225
 virtual unsigned int getNumProcesses() const override {return group_num_processes_;}

226
 /** Returns the MPI intra-communicator associated with the group of processes. **/
227
228
 virtual const MPICommProxy & getMPICommProxy() const override {return intra_comm_;}

229
230
231
232
private:

 unsigned int current_process_rank_; //rank of the current process (in some process group)
 unsigned int group_num_processes_;  //total number of processes (in some process group)
233
 std::size_t memory_per_process_;    //amount of memory (bytes) per process
234
 MPICommProxy intra_comm_;           //MPI communicator for the process group
235
236
237
 const std::unordered_map<std::string,std::shared_ptr<Tensor>> & local_tensors_; //locally stored tensors
};

238

239
//Numerical server:
240
class NumServer final {
241
242
243

public:

244
#ifdef MPI_ENABLED
245
 NumServer(const MPICommProxy & communicator,                               //MPI communicator proxy
246
           const ParamConf & parameters,                                    //runtime configuration parameters
247
           const std::string & graph_executor_name = "lazy-dag-executor",   //DAG executor kind
248
249
           const std::string & node_executor_name = "talsh-node-executor"); //DAG node executor kind
#else
250
 NumServer(const ParamConf & parameters,                                    //runtime configuration parameters
251
           const std::string & graph_executor_name = "lazy-dag-executor",   //DAG executor kind
252
253
           const std::string & node_executor_name = "talsh-node-executor"); //DAG node executor kind
#endif
254

255
256
 NumServer(const NumServer &) = delete;
 NumServer & operator=(const NumServer &) = delete;
257
258
 NumServer(NumServer &&) noexcept = delete;
 NumServer & operator=(NumServer &&) noexcept = delete;
259
 ~NumServer();
260

261
 /** Reconfigures tensor runtime implementation. **/
262
#ifdef MPI_ENABLED
263
 void reconfigureTensorRuntime(const MPICommProxy & communicator,
264
                               const ParamConf & parameters,
265
266
267
                               const std::string & dag_executor_name,
                               const std::string & node_executor_name);
#else
268
269
 void reconfigureTensorRuntime(const ParamConf & parameters,
                               const std::string & dag_executor_name,
270
                               const std::string & node_executor_name);
271
#endif
272

273
274
275
 /** Switches the computational backend. **/
 void switchComputationalBackend(const std::string & backend_name);

276
277
 /** Resets the tensor contraction sequence optimizer that is
     invoked when evaluating tensor networks. **/
278
279
280
281
 void resetContrSeqOptimizer(const std::string & optimizer_name, //in: tensor contraction sequence optimizer name
                             bool caching = false);              //whether or not optimized tensor contraction sequence will be cached for later reuse

 /** Activates optimized tensor contraction sequence caching for later reuse. **/
282
 void activateContrSeqCaching(bool persist = false);
283
284
285

 /** Deactivates optimized tensor contraction sequence caching. **/
 void deactivateContrSeqCaching();
286

287
288
289
 /** Queries the status of optimized tensor contraction sequence caching. **/
 bool queryContrSeqCaching() const;

290
291
292
 /** Resets the client logging level (0:none). **/
 void resetClientLoggingLevel(int level = 0);

293
294
295
 /** Resets the runtime logging level (0:none). **/
 void resetRuntimeLoggingLevel(int level = 0);

296
297
298
299
 /** Resets tensor operation execution serialization. **/
 void resetExecutionSerialization(bool serialize,
                                  bool validation_trace = false);

300
301
302
 /** Activates/deactivates dry run (no actual computations). **/
 void activateDryRun(bool dry_run);

303
304
305
 /** Activates mixed-precision fast math operations on all devices (if available). **/
 void activateFastMath();

306
 /** Returns the Host memory buffer size in bytes provided by the runtime. **/
307
 std::size_t getMemoryBufferSize() const;
308

309
310
311
312
 /** Returns the current memory usage by all allocated tensors.
     Note that the returned value includes buffer fragmentation overhead. **/
 std::size_t getMemoryUsage(std::size_t * free_mem) const;

313
314
315
 /** Returns the current value of the Flop counter. **/
 double getTotalFlopCount() const;

316
317
318
 /** Returns the default process group comprising all MPI processes and their communicator. **/
 const ProcessGroup & getDefaultProcessGroup() const;

319
 /** Returns the current process group comprising solely the current MPI process and its own self-communicator. **/
320
321
 const ProcessGroup & getCurrentProcessGroup() const;

322
323
324
 /** Returns the local rank of the MPI process in a given process group, or -1 if it does not belong to it. **/
 int getProcessRank(const ProcessGroup & process_group) const;

325
326
327
 /** Returns the global rank of the current MPI process in the default process group. **/
 int getProcessRank() const;

328
329
330
 /** Returns the number of MPI processes in a given process group. **/
 int getNumProcesses(const ProcessGroup & process_group) const;

331
332
333
 /** Returns the total number of MPI processes in the default process group. **/
 int getNumProcesses() const;

334
335
336
337
338
339
 /** Returns a composite tensor mapper for a given process group. **/
 std::shared_ptr<TensorMapper> getTensorMapper(const ProcessGroup & process_group) const;

 /** Returns a composite tensor mapper for the default process group (all processes). **/
 std::shared_ptr<TensorMapper> getTensorMapper() const;

340
 /** Registers an external tensor method. **/
341
342
 void registerTensorMethod(const std::string & tag,
                           std::shared_ptr<TensorMethod> method);
343

344
 /** Retrieves a registered external tensor method. **/
345
 std::shared_ptr<TensorMethod> getTensorMethod(const std::string & tag);
346
347

 /** Registers an external data packet. **/
348
349
 void registerExternalData(const std::string & tag,
                           std::shared_ptr<BytePacket> packet);
350
351
352

 /** Retrieves a registered external data packet. **/
 std::shared_ptr<BytePacket> getExternalData(const std::string & tag);
353

354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371

 /** Opens a new (child) TAProL scope and returns its id. **/
 ScopeId openScope(const std::string & scope_name); //new scope name

 /** Closes the currently open TAProL scope and returns its parental scope id. **/
 ScopeId closeScope();


 /** Creates a named vector space, returns its registered id, and,
     optionally, a non-owning pointer to it. **/
 SpaceId createVectorSpace(const std::string & space_name,            //in: vector space name
                           DimExtent space_dim,                       //in: vector space dimension
                           const VectorSpace ** space_ptr = nullptr); //out: non-owning pointer to the created vector space

 /** Destroys a previously created named vector space. **/
 void destroyVectorSpace(const std::string & space_name); //in: name of the vector space to destroy
 void destroyVectorSpace(SpaceId space_id);               //in: id of the vector space to destroy

372
373
374
375
 /** Returns a non-owning pointer to a previosuly registered vector space,
     including the anonymous vector space. **/
 const VectorSpace * getVectorSpace(const std::string & space_name) const;

376
377
378

 /** Creates a named subspace of a named vector space,
     returns its registered id, and, optionally, a non-owning pointer to it. **/
379
380
381
382
 SubspaceId createSubspace(const std::string & subspace_name,         //in: subspace name
                           const std::string & space_name,            //in: containing vector space name
                           std::pair<DimOffset,DimOffset> bounds,     //in: range of basis vectors defining the created subspace
                           const Subspace ** subspace_ptr = nullptr); //out: non-owning pointer to the created subspace
383

384
 /** Destroys a previously created named subspace of a named vector space. **/
385
 void destroySubspace(const std::string & subspace_name); //in: name of the subspace to destroy
386
 void destroySubspace(SubspaceId subspace_id); //in: id of the subspace to destroy
387

388
389
390
391
 /** Returns a non-owning pointer to a previosuly registered named subspace
     of a previously registered named vector space. **/
 const Subspace * getSubspace(const std::string & subspace_name) const;

392

393
 /** Submits an individual (simple or composite) tensor operation for processing.
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
394
395
396
     Composite tensor operations require an implementation of the TensorMapper interface. **/
 bool submit(std::shared_ptr<TensorOperation> operation,   //in: tensor operation for numerical evaluation
             std::shared_ptr<TensorMapper> tensor_mapper); //in: tensor mapper (for composite tensor operations only)
397
398
399

 /** Submits a tensor network for processing (evaluating the output tensor-result).
     If the output (result) tensor has not been created yet, it will be created and
400
401
402
     initialized to zero automatically, and later destroyed automatically when no longer needed.
     By default all parallel processes will be processing the tensor network,
     otherwise the desired process subset needs to be explicitly specified. **/
403
404
405
406
407
408
 bool submit(TensorNetwork & network);                    //in: tensor network for numerical evaluation
 bool submit(std::shared_ptr<TensorNetwork> network);     //in: tensor network for numerical evaluation
 bool submit(const ProcessGroup & process_group,          //in: chosen group of MPI processes
             TensorNetwork & network);                    //in: tensor network for numerical evaluation
 bool submit(const ProcessGroup & process_group,          //in: chosen group of MPI processes
             std::shared_ptr<TensorNetwork> network);     //in: tensor network for numerical evaluation
409
410
411

 /** Submits a tensor network expansion for processing (evaluating output tensors of all
     constituting tensor networks and accumualting them in the provided accumulator tensor).
412
413
414
     Synchronization of the tensor expansion evaluation is done via syncing on the accumulator
     tensor. By default all parallel processes will be processing the tensor network,
     otherwise the desired process subset needs to be explicitly specified. **/
415
 bool submit(TensorExpansion & expansion,                 //in: tensor expansion for numerical evaluation
416
417
             std::shared_ptr<Tensor> accumulator,         //inout: tensor accumulator (result)
             unsigned int parallel_width = 1);            //in: requested number of execution subgroups running in parallel
418
 bool submit(std::shared_ptr<TensorExpansion> expansion,  //in: tensor expansion for numerical evaluation
419
420
             std::shared_ptr<Tensor> accumulator,         //inout: tensor accumulator (result)
             unsigned int parallel_width = 1);            //in: requested number of execution subgroups running in parallel
421
422
 bool submit(const ProcessGroup & process_group,          //in: chosen group of MPI processes
             TensorExpansion & expansion,                 //in: tensor expansion for numerical evaluation
423
424
             std::shared_ptr<Tensor> accumulator,         //inout: tensor accumulator (result)
             unsigned int parallel_width = 1);            //in: requested number of execution subgroups running in parallel
425
426
 bool submit(const ProcessGroup & process_group,          //in: chosen group of MPI processes
             std::shared_ptr<TensorExpansion> expansion,  //in: tensor expansion for numerical evaluation
427
428
             std::shared_ptr<Tensor> accumulator,         //inout: tensor accumulator (result)
             unsigned int parallel_width = 1);            //in: requested number of execution subgroups running in parallel
429

430
 /** Synchronizes all update operations on a given tensor.
431
432
     Changing wait to FALSE, only tests for completion.
     If ProcessGroup is not provided, defaults to the local process. **/
433
 bool sync(const Tensor & tensor,
434
           bool wait = true);
435
436
437
 bool sync(const ProcessGroup & process_group,
           const Tensor & tensor,
           bool wait = true);
438
 /** Synchronizes execution of a specific tensor network.
439
440
     Changing wait to FALSE, only tests for completion.
     If ProcessGroup is not provided, defaults to the local process. **/
441
 bool sync(TensorNetwork & network,
442
           bool wait = true);
443
444
445
 bool sync(const ProcessGroup & process_group,
           TensorNetwork & network,
           bool wait = true);
446
 /** Synchronizes execution of all outstanding tensor operations.
447
448
     Changing wait to FALSE, only tests for completion.
     If ProcessGroup is not provided, defaults to the local process. **/
449
 bool sync(bool wait = true);
450
451
 bool sync(const ProcessGroup & process_group,
           bool wait = true);
452

453
454
 /** HIGHER-LEVEL WRAPPERS **/

455
456
 /** Synchronizes all outstanding update operations on a given tensor.
     If ProcessGroup is not provided, defaults to the local process. **/
457
458
 bool sync(const std::string & name, //in: tensor name
           bool wait = true);        //in: wait versus test for completion
459
460
461
 bool sync(const ProcessGroup & process_group,
           const std::string & name, //in: tensor name
           bool wait = true);        //in: wait versus test for completion
462

463
464
465
 /** Checks whether a given tensor has been allocated storage (created). **/
 bool tensorAllocated(const std::string & name) const; //in: tensor name

466
467
468
 /** Returns a shared pointer to the requested tensor object. **/
 std::shared_ptr<Tensor> getTensor(const std::string & name); //in: tensor name

469
 /** Returns a reference to the requested tensor object. **/
470
 Tensor & getTensorRef(const std::string & name); //in: tensor name
471

472
 /** Returns the tensor element type. **/
473
 TensorElementType getTensorElementType(const std::string & name) const; //in: tensor name
474

475
476
477
478
479
480
481
482
483
 /** Registers a group of tensor dimensions which form an isometry when
     contracted over with the conjugated tensor (see exatn::numerics::Tensor).
     Returns TRUE on success, FALSE on failure. **/
 bool registerTensorIsometry(const std::string & name,                     //in: tensor name
                             const std::vector<unsigned int> & iso_dims);  //in: tensor dimensions forming the isometry
 bool registerTensorIsometry(const std::string & name,                     //in: tensor name
                             const std::vector<unsigned int> & iso_dims0,  //in: tensor dimensions forming the isometry (group 0)
                             const std::vector<unsigned int> & iso_dims1); //in: tensor dimensions forming the isometry (group 1)

484
485
486
487
488
489
490
491
492
493
 /** Returns TRUE if the calling process is within the existence domain of all given tensors, FALSE otherwise. **/
 template <typename... Args>
 bool withinTensorExistenceDomain(const std::string & tensor_name, Args&&... tensor_names) const //in: tensor names
 {
  if(!withinTensorExistenceDomain(tensor_name)) return false;
  return withinTensorExistenceDomain(std::forward<Args>(tensor_names)...);
 }

 bool withinTensorExistenceDomain(const std::string & tensor_name) const; //in: tensor name

494
 /** Returns the process group associated with the given tensors, that is,
495
496
     the intersection of existence domains of the given tensors. Note that
     the existence domains of the given tensors must be properly nested,
497
      tensorA <= tensorB <= tensorC <= ... <= tensorZ,
498
499
500
501
     for some order of the tensors, otherwise the code will result in
     an undefined behavior. It is user's responsibility to ensure that
     the returned process group is also a subdomain of full presence
     for all participating tensors. **/
502
503
504
505
506
 template <typename... Args>
 const ProcessGroup & getTensorProcessGroup(const std::string & tensor_name, Args&&... tensor_names) const //in: tensor names
 {
  const auto & tensor_domain = getTensorProcessGroup(tensor_name);
  const auto & other_tensors_domain = getTensorProcessGroup(std::forward<Args>(tensor_names)...);
507
508
509
510
511
  if(tensor_domain.isContainedIn(other_tensors_domain)){
   return tensor_domain;
  }else if(other_tensors_domain.isContainedIn(tensor_domain)){
   return other_tensors_domain;
  }else{
512
   std::cout << "#ERROR(exatn::getTensorProcessGroup): Tensor operand existence domains must be properly nested: "
513
514
             << "Tensor " << tensor_name << " is not properly nested w.r.t. tensors ";
   print_variadic_pack(std::forward<Args>(tensor_names)...); std::cout << std::endl;
515
516
   const auto & tensor_domain_ranks = tensor_domain.getProcessRanks();
   const auto & other_tensors_domain_ranks = other_tensors_domain.getProcessRanks();
517
   std::cout << tensor_name << ":" << std::endl;
518
519
   for(const auto & proc_rank: tensor_domain_ranks) std::cout << " " << proc_rank;
   std::cout << std::endl;
520
   print_variadic_pack(std::forward<Args>(tensor_names)...); std::cout << ":" << std::endl;
521
522
   for(const auto & proc_rank: other_tensors_domain_ranks) std::cout << " " << proc_rank;
   std::cout << std::endl;
523
524
   assert(false);
  };
525
  return getDefaultProcessGroup();
526
527
528
 }

 const ProcessGroup & getTensorProcessGroup(const std::string & tensor_name) const; //tensor name
529

530
 /** Declares, registers, and actually creates a tensor via the processing backend.
531
532
     See numerics::Tensor constructors for different creation options. **/
 template <typename... Args>
533
534
535
 bool createTensor(const std::string & name,       //in: tensor name
                   TensorElementType element_type, //in: tensor element type
                   Args&&... args);                //in: other arguments for Tensor ctor
536

537
538
539
540
541
 template <typename... Args>
 bool createTensorSync(const std::string & name,       //in: tensor name
                       TensorElementType element_type, //in: tensor element type
                       Args&&... args);                //in: other arguments for Tensor ctor

542
543
544
545
 bool createTensor(std::shared_ptr<Tensor> tensor,  //in: existing declared tensor (can be composite)
                   TensorElementType element_type); //in: tensor element type

 bool createTensorSync(std::shared_ptr<Tensor> tensor,  //in: existing declared tensor (can be composite)
546
547
                       TensorElementType element_type); //in: tensor element type

548
549
550
551
 bool createTensor(const std::string & name,          //in: tensor name
                   const TensorSignature & signature, //in: tensor signature with registered spaces/subspaces
                   TensorElementType element_type);   //in: tensor element type

552
553
554
555
556
557
558
559
560
561
562
563
 template <typename... Args>
 bool createTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                   const std::string & name,           //in: tensor name
                   TensorElementType element_type,     //in: tensor element type
                   Args&&... args);                    //in: other arguments for Tensor ctor

 template <typename... Args>
 bool createTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                       const std::string & name,           //in: tensor name
                       TensorElementType element_type,     //in: tensor element type
                       Args&&... args);                    //in: other arguments for Tensor ctor

564
565
566
567
 bool createTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                   std::shared_ptr<Tensor> tensor,     //in: existing declared tensor (can be composite)
                   TensorElementType element_type);    //in: tensor element type

568
 bool createTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
569
                       std::shared_ptr<Tensor> tensor,     //in: existing declared tensor (can be composite)
570
571
                       TensorElementType element_type);    //in: tensor element type

572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
 /** Creates and allocates storage for a composite tensor distributed over a given process group.
     The distribution of tensor blocks is dictated by its split dimensions. **/
 template <typename... Args>
 bool createTensor(const ProcessGroup & process_group,                      //in: chosen group of MPI processes
                   const std::string & name,                                //in: tensor name
                   const std::vector<std::pair<unsigned int,
                                               unsigned int>> & split_dims, //in: split tensor dimensions: pair{Dimension,MaxDepth}
                   TensorElementType element_type,                          //in: tensor element type
                   Args&&... args);                                         //in: other arguments for Tensor ctor

 template <typename... Args>
 bool createTensorSync(const ProcessGroup & process_group,                      //in: chosen group of MPI processes
                       const std::string & name,                                //in: tensor name
                       const std::vector<std::pair<unsigned int,
                                                   unsigned int>> & split_dims, //in: split tensor dimensions: pair{Dimension,MaxDepth}
                       TensorElementType element_type,                          //in: tensor element type
                       Args&&... args);                                         //in: other arguments for Tensor ctor

590
 /** Creates all input tensors in a given tensor network that are still unallocated. **/
591
592
593
594
595
596
597
598
599
600
601
602
603
604
 bool createTensors(TensorNetwork & tensor_network,         //inout: tensor network
                    TensorElementType element_type);        //in: tensor element type

 bool createTensorsSync(TensorNetwork & tensor_network,     //inout: tensor network
                        TensorElementType element_type);    //in: tensor element type

 bool createTensors(const ProcessGroup & process_group,     //in: chosen group of MPI processes
                    TensorNetwork & tensor_network,         //inout: tensor network
                    TensorElementType element_type);        //in: tensor element type

 bool createTensorsSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                        TensorNetwork & tensor_network,     //inout: tensor network
                        TensorElementType element_type);    //in: tensor element type

605
 /** Creates all input tensors in a given tensor network expansion that are still unallocated. **/
606
607
608
609
610
611
612
613
614
615
616
617
618
619
 bool createTensors(TensorExpansion & tensor_expansion,     //inout: tensor expansion
                    TensorElementType element_type);        //in: tensor element type

 bool createTensorsSync(TensorExpansion & tensor_expansion, //inout: tensor expansion
                        TensorElementType element_type);    //in: tensor element type

 bool createTensors(const ProcessGroup & process_group,     //in: chosen group of MPI processes
                    TensorExpansion & tensor_expansion,     //inout: tensor expansion
                    TensorElementType element_type);        //in: tensor element type

 bool createTensorsSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                        TensorExpansion & tensor_expansion, //inout: tensor expansion
                        TensorElementType element_type);    //in: tensor element type

620
621
622
 /** Destroys a tensor, including its backend representation. **/
 bool destroyTensor(const std::string & name); //in: tensor name

623
624
 bool destroyTensorSync(const std::string & name); //in: tensor name

625
626
627
628
629
630
 /** Destroys all currently allocated tensors in a given tensor network.
     Note that the destroyed tensors could also be present in other tensor networks. **/
 bool destroyTensors(TensorNetwork & tensor_network);     //inout: tensor network

 bool destroyTensorsSync(TensorNetwork & tensor_network); //inout: tensor network

631
632
633
 /** Initializes a tensor to some scalar value. **/
 template<typename NumericType>
 bool initTensor(const std::string & name, //in: tensor name
634
635
636
637
638
                 NumericType value);       //in: scalar value

 template<typename NumericType>
 bool initTensorSync(const std::string & name, //in: tensor name
                     NumericType value);       //in: scalar value
639

640
641
642
643
644
645
646
647
648
649
650
 /** Initializes a tensor with externally provided data.
     The vector containing externally provided data assumes
     the column-wise storage in the initialized tensor.  **/
 template<typename NumericType>
 bool initTensorData(const std::string & name,                   //in: tensor name
                     const std::vector<NumericType> & ext_data); //in: vector with externally provided data

 template<typename NumericType>
 bool initTensorDataSync(const std::string & name,                   //in: tensor name
                         const std::vector<NumericType> & ext_data); //in: vector with externally provided data

651
652
653
654
655
656
657
658
659
660
661
662
663
664
/** Initializes a tensor with externally provided data read from a file with format:
     Storage format (string: {dense|list})
     Tensor name
     Tensor shape (space-separated dimension extents)
     Tensor signature (space-separated dimension base offsets)
     Tensor elements:
      Dense format: Numeric values (column-wise order), any number of values per line
      List format: Numeric value and Multi-index in each line **/
 bool initTensorFile(const std::string & name,      //in: tensor name
                     const std::string & filename); //in: file name with tensor data

 bool initTensorFileSync(const std::string & name,      //in: tensor name
                         const std::string & filename); //in: file name with tensor data

665
 /** Initializes a tensor to some random value. **/
666
 bool initTensorRnd(const std::string & name);     //in: tensor name
667
668
669

 bool initTensorRndSync(const std::string & name); //in: tensor name

670
671
672
673
674
 /** Initializes all input tensors of a given tensor network to a random value. **/
 bool initTensorsRnd(TensorNetwork & tensor_network);     //inout: tensor network

 bool initTensorsRndSync(TensorNetwork & tensor_network); //inout: tensor network

675
676
677
678
679
 /** Initializes all input tensors in a given tensor network expansion to a random value. **/
 bool initTensorsRnd(TensorExpansion & tensor_expansion);     //inout: tensor network expansion

 bool initTensorsRndSync(TensorExpansion & tensor_expansion); //inout: tensor network expansion

680
681
682
683
684
 /** Initializes special tensors present in the tensor network. **/
 bool initTensorsSpecial(TensorNetwork & tensor_network);     //inout: tensor network

 bool initTensorsSpecialSync(TensorNetwork & tensor_network); //inout: tensor network

Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
685
686
687
688
 /** Computes max-abs norm of a tensor. **/
 bool computeMaxAbsSync(const std::string & name, //in: tensor name
                        double & norm);           //out: tensor norm

689
690
691
692
693
694
695
696
 /** Computes 1-norm of a tensor. **/
 bool computeNorm1Sync(const std::string & name, //in: tensor name
                       double & norm);           //out: tensor norm

 /** Computes 2-norm of a tensor. **/
 bool computeNorm2Sync(const std::string & name, //in: tensor name
                       double & norm);           //out: tensor norm

697
698
699
700
701
 /** Computes partial 2-norms over a chosen tensor dimension. **/
 bool computePartialNormsSync(const std::string & name,             //in: tensor name
                              unsigned int tensor_dimension,        //in: chosen tensor dimension
                              std::vector<double> & partial_norms); //out: partial 2-norms over the chosen tensor dimension

702
703
704
705
 /** Computes 2-norms of all tensors in a tensor network. **/
 bool computeNorms2Sync(const TensorNetwork & network,         //in: tensor network
                        std::map<std::string,double> & norms); //out: tensor norms: tensor_name --> norm

706
707
708
709
710
 /** Replicates a tensor within the given process group, which defaults to all MPI processes.
     Only the root_process_rank within the given process group is required to have the tensor,
     that is, the tensor will automatically be created in those MPI processes which do not have it.  **/
 bool replicateTensor(const std::string & name,           //in: tensor name
                      int root_process_rank);             //in: local rank of the root process within the given process group
711

712
713
714
 bool replicateTensorSync(const std::string & name,       //in: tensor name
                          int root_process_rank);         //in: local rank of the root process within the given process group

715
716
717
 bool replicateTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                      const std::string & name,           //in: tensor name
                      int root_process_rank);             //in: local rank of the root process within the given process group
718

719
720
721
722
 bool replicateTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                          const std::string & name,           //in: tensor name
                          int root_process_rank);             //in: local rank of the root process within the given process group

723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
 /** Shrinks the domain of existence of a given tensor to a single process. **/
 bool dereplicateTensor(const std::string & name,     //in: tensor name
                        int root_process_rank);       //in: local rank of the chosen process

 bool dereplicateTensorSync(const std::string & name, //in: tensor name
                            int root_process_rank);   //in: local rank of the chosen process

 bool dereplicateTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                        const std::string & name,           //in: tensor name
                        int root_process_rank);             //in: local rank of the chose process

 bool dereplicateTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                            const std::string & name,           //in: tensor name
                            int root_process_rank);             //in: local rank of the chosen process

738
739
740
741
742
743
744
745
 /** Broadcast a tensor among all MPI processes within a given process group,
     which defaults to all MPI processes. This function is needed when
     a tensor is updated in an operation submitted to a subset of MPI processes
     such that the excluded MPI processes can receive an updated version of the tensor.
     Note that the tensor must exist in all participating MPI processes. **/
 bool broadcastTensor(const std::string & name,           //in: tensor name
                      int root_process_rank);             //in: local rank of the root process within the given process group

746
747
748
 bool broadcastTensorSync(const std::string & name,       //in: tensor name
                          int root_process_rank);         //in: local rank of the root process within the given process group

749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
 bool broadcastTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                      const std::string & name,           //in: tensor name
                      int root_process_rank);             //in: local rank of the root process within the given process group

 bool broadcastTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                          const std::string & name,           //in: tensor name
                          int root_process_rank);             //in: local rank of the root process within the given process group

 /** Performs a global sum reduction on a tensor among all MPI processes within a given
     process group, which defaults to all MPI processes. This function is needed when
     multiple MPI processes compute their local updates to the tensor, thus requiring
     a global sum reduction such that each MPI process will get the final (same) tensor
     value. Note that the tensor must exist in all participating MPI processes. **/
 bool allreduceTensor(const std::string & name);          //in: tensor name

764
765
 bool allreduceTensorSync(const std::string & name);      //in: tensor name

766
767
768
769
770
771
 bool allreduceTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                      const std::string & name);          //in: tensor name

 bool allreduceTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                          const std::string & name);          //in: tensor name

772
773
774
775
776
777
778
779
780
 /** Scales a tensor by a scalar value. **/
 template<typename NumericType>
 bool scaleTensor(const std::string & name, //in: tensor name
                  NumericType value);       //in: scalar value

 template<typename NumericType>
 bool scaleTensorSync(const std::string & name, //in: tensor name
                      NumericType value);       //in: scalar value

781
 /** Transforms (updates) a tensor according to a user-defined tensor functor. **/
782
 bool transformTensor(const std::string & name,               //in: tensor name
783
                      std::shared_ptr<TensorMethod> functor); //in: functor defining the tensor transformation
784
785

 bool transformTensorSync(const std::string & name,               //in: tensor name
786
787
788
789
790
791
792
                          std::shared_ptr<TensorMethod> functor); //in: functor defining the tensor transformation

 bool transformTensor(const std::string & name,              //in: tensor name
                      const std::string & functor_name);     //in: name of the functor defining the tensor transformation

 bool transformTensorSync(const std::string & name,          //in: tensor name
                          const std::string & functor_name); //in: name of the functor defining the tensor transformation
793

794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
 /** Extracts a slice from a tensor and stores it in another tensor
     the signature and shape of which determines which slice to extract. **/
 bool extractTensorSlice(const std::string & tensor_name, //in: tensor name
                         const std::string & slice_name); //in: slice name

 bool extractTensorSliceSync(const std::string & tensor_name, //in: tensor name
                             const std::string & slice_name); //in: slice name

 /** Inserts a slice into a tensor. The signature and shape of the slice
     determines the position in the tensor where the slice will be inserted. **/
 bool insertTensorSlice(const std::string & tensor_name, //in: tensor name
                        const std::string & slice_name); //in: slice name

 bool insertTensorSliceSync(const std::string & tensor_name, //in: tensor name
                            const std::string & slice_name); //in: slice name

810
 /** Assigns one tensor to another congruent one (makes a copy of a tensor).
811
812
813
     If the output tensor with the given name does not exist, it will be created.
     Note that the output tensor must either exist or not exist across all
     participating processes, otherwise it will result in an undefined behavior! **/
814
815
816
817
818
819
 bool copyTensor(const std::string & output_name, //in: output tensor name
                 const std::string & input_name); //in: input tensor name

 bool copyTensorSync(const std::string & output_name, //in: output tensor name
                     const std::string & input_name); //in: input tensor name

820
821
 /** Performs tensor addition: tensor0 += tensor1 * alpha **/
 template<typename NumericType>
822
823
 bool addTensors(const std::string & addition, //in: symbolic tensor addition specification
                 NumericType alpha);           //in: alpha prefactor
824

825
826
827
828
 template<typename NumericType>
 bool addTensorsSync(const std::string & addition, //in: symbolic tensor addition specification
                     NumericType alpha);           //in: alpha prefactor

829
830
 /** Performs tensor contraction: tensor0 += tensor1 * tensor2 * alpha **/
 template<typename NumericType>
831
832
 bool contractTensors(const std::string & contraction, //in: symbolic tensor contraction specification
                      NumericType alpha);              //in: alpha prefactor
833

834
835
836
837
 template<typename NumericType>
 bool contractTensorsSync(const std::string & contraction, //in: symbolic tensor contraction specification
                          NumericType alpha);              //in: alpha prefactor

838
839
840
841
842
843
844
 /** Decomposes a tensor into three tensor factors via SVD. The symbolic
     tensor contraction specification specifies the decomposition,
     for example:
      D(a,b,c,d,e) = L(c,i,e,j) * S(i,j) * R(b,j,a,i,d)
     where
      L(c,i,e,j) is the left SVD factor,
      R(b,j,a,i,d) is the right SVD factor,
845
846
      S(i,j) is the middle SVD factor (the diagonal with singular values).
     Note that the ordering of the contracted indices is not guaranteed! **/
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
 bool decomposeTensorSVD(const std::string & contraction); //in: three-factor symbolic tensor contraction specification

 bool decomposeTensorSVDSync(const std::string & contraction); //in: three-factor symbolic tensor contraction specification

 /** Decomposes a tensor into two tensor factors via SVD. The symbolic
     tensor contraction specification specifies the decomposition,
     for example:
      D(a,b,c,d,e) = L(c,i,e,j) * R(b,j,a,i,d)
     where
      L(c,i,e,j) is the left SVD factor with absorbed singular values,
      R(b,j,a,i,d) is the right SVD factor. **/
 bool decomposeTensorSVDL(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 bool decomposeTensorSVDLSync(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 /** Decomposes a tensor into two tensor factors via SVD. The symbolic
     tensor contraction specification specifies the decomposition,
     for example:
      D(a,b,c,d,e) = L(c,i,e,j) * R(b,j,a,i,d)
     where
      L(c,i,e,j) is the left SVD factor,
      R(b,j,a,i,d) is the right SVD factor with absorbed singular values. **/
 bool decomposeTensorSVDR(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 bool decomposeTensorSVDRSync(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 /** Decomposes a tensor into two tensor factors via SVD. The symbolic
     tensor contraction specification specifies the decomposition,
     for example:
      D(a,b,c,d,e) = L(c,i,e,j) * R(b,j,a,i,d)
     where
      L(c,i,e,j) is the left SVD factor with absorbed square root of singular values,
      R(b,j,a,i,d) is the right SVD factor with absorbed square root of singular values. **/
 bool decomposeTensorSVDLR(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 bool decomposeTensorSVDLRSync(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 /** Orthogonalizes a tensor by decomposing it via SVD while discarding
885
886
     the middle tensor factor with singular values. The symbolic tensor contraction
     specification specifies the decomposition. It must contain strictly one contracted index! **/
887
888
889
890
 bool orthogonalizeTensorSVD(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 bool orthogonalizeTensorSVDSync(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

891
 /** Orthogonalizes an isometric tensor over its isometric dimensions via the modified Gram-Schmidt procedure.  **/
892
893
894
895
 bool orthogonalizeTensorMGS(const std::string & name); //in: tensor name

 bool orthogonalizeTensorMGSSync(const std::string & name); //in: tensor name

896
897
898
899
900
901
902
903
904
905
906
907
 /** Prints a tensor to the standard output. **/
 bool printTensor(const std::string & name); //in: tensor name

 bool printTensorSync(const std::string & name); //in: tensor name

 /** Prints a tensor to a file. **/
 bool printTensorFile(const std::string & name,      //in: tensor name
                      const std::string & filename); //in: file name

 bool printTensorFileSync(const std::string & name,      //in: tensor name
                          const std::string & filename); //in: file name

908
909
 /** Performs a full evaluation of a tensor network based on the symbolic
     specification involving already created tensors (including the output). **/
910
911
912
913
914
915
916
917
918
919
920
 bool evaluateTensorNetwork(const std::string & name,           //in: tensor network name
                            const std::string & network);       //in: symbolic tensor network specification
 bool evaluateTensorNetwork(const ProcessGroup & process_group, //in: chosen group of MPI processes
                            const std::string & name,           //in: tensor network name
                            const std::string & network);       //in: symbolic tensor network specification

 bool evaluateTensorNetworkSync(const std::string & name,           //in: tensor network name
                                const std::string & network);       //in: symbolic tensor network specification
 bool evaluateTensorNetworkSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                const std::string & name,           //in: tensor network name
                                const std::string & network);       //in: symbolic tensor network specification
921

922
 /** Normalizes a tensor to a given 2-norm. **/
923
924
925
 bool normalizeNorm2Sync(const std::string & name,          //in: tensor name
                         double norm = 1.0,                 //in: desired 2-norm
                         double * original_norm = nullptr); //out: original 2-norm
926

927
928
929
 /** Normalizes a tensor network expansion to a given 2-norm by rescaling
     all tensor network components by the same factor: Only the tensor
     network expansion coefficients are affected. **/
930
931
932
 bool normalizeNorm2Sync(TensorExpansion & expansion,       //inout: tensor network expansion
                         double norm = 1.0,                 //in: desired 2-norm
                         double * original_norm = nullptr); //out: original 2-norm
933

934
 bool normalizeNorm2Sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
935
                         TensorExpansion & expansion,        //inout: tensor network expansion
936
937
                         double norm = 1.0,                  //in: desired 2-norm
                         double * original_norm = nullptr);  //out: original 2-norm
938

939
940
941
942
943
 /** Normalizes all input tensors in a tensor network to a given 2-norm.
     If only_optimizable is TRUE, only optimizable tensors will be normalized. **/
 bool balanceNorm2Sync(TensorNetwork & network,        //inout: tensor network
                       double norm,                    //in: desired 2-norm
                       bool only_optimizable = false); //in: whether to normalize only optimizable tensors
944
945
946

 bool balanceNorm2Sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                       TensorNetwork & network,            //inout: tensor network
947
948
                       double norm,                        //in: desired 2-norm
                       bool only_optimizable = false);     //in: whether to normalize only optimizable tensors
949

950
951
952
953
954
 /** Normalizes all input tensors in a tensor network expansion to a given 2-norm.
     If only_optimizable is TRUE, only optimizable tensors will be normalized. **/
 bool balanceNorm2Sync(TensorExpansion & expansion,    //inout: tensor network expansion
                       double norm,                    //in: desired 2-norm
                       bool only_optimizable = false); //in: whether to normalize only optimizable tensors
955
956
957

 bool balanceNorm2Sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                       TensorExpansion & expansion,        //inout: tensor network expansion
958
959
                       double norm,                        //in: desired 2-norm
                       bool only_optimizable = false);     //in: whether to normalize only optimizable tensors
960

961
962
 /** Normalizes all input tensors in a tensor network expansion to a given 2-norm
     and rescales tensor network expansion coefficients to normalize the entire
963
964
965
966
967
968
     tensor network expansion to another given 2-norm. If only_optimizable is TRUE,
     only optimizable tensors will be normalized. **/
 bool balanceNormalizeNorm2Sync(TensorExpansion & expansion,    //inout: tensor network expansion
                                double tensor_norm = 1.0,       //in: desired 2-norm of each input tensor
                                double expansion_norm = 1.0,    //in: desired 2-norm of the tensor network expansion
                                bool only_optimizable = false); //in: whether to normalize only optimizable tensors
969
970
971
972

 bool balanceNormalizeNorm2Sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                TensorExpansion & expansion,        //inout: tensor network expansion
                                double tensor_norm = 1.0,           //in: desired 2-norm of each input tensor
973
974
                                double expansion_norm = 1.0,        //in: desired 2-norm of the tensor network expansion
                                bool only_optimizable = false);     //in: whether to normalize only optimizable tensors
975

976
977
 /** Duplicates a given tensor network as a new tensor network copy.
     The name of the tensor network copy will be prepended with an underscore
978
     whereas all copies of the input tensors will be renamed with their unique (local) hashes. **/
979
980
981
982
983
984
 std::shared_ptr<TensorNetwork> duplicateSync(const TensorNetwork & network); //in: tensor network

 std::shared_ptr<TensorNetwork> duplicateSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                              const TensorNetwork & network);     //in: tensor network

 /** Duplicates a given tensor network expansion as a new tensor network expansion copy.
985
986
     The name of the tensor network expansion copy will be prepended with an underscore
     whereas all copies of the input tensors will be renamed with their unique (local) hashes. **/
987
988
989
990
991
 std::shared_ptr<TensorExpansion> duplicateSync(const TensorExpansion & expansion); //in: tensor expansion

 std::shared_ptr<TensorExpansion> duplicateSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                                const TensorExpansion & expansion); //in: tensor expansion

992
993
994
995
996
997
998
999
1000
 /** Projects a given tensor network to a chosen slice of its full output tensor. **/
 std::shared_ptr<TensorNetwork> projectSliceSync(const TensorNetwork & network, //in: tensor network
                                                 const Tensor & slice);         //in: desired slice of the output tensor

 std::shared_ptr<TensorNetwork> projectSliceSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                                 const TensorNetwork & network,      //in: tensor network
                                                 const Tensor & slice);              //in: desired slice of the output tensor

 /** Projects a given tensor network expansion to a chosen slice of its full output tensor. **/