num_server.hpp 81.7 KB
Newer Older
1
/** ExaTN::Numerics: Numerical server
2
REVISION: 2021/12/22
3

4
5
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
6

7
8
9
10
/** Rationale:
 (a) Numerical server provides basic tensor network processing functionality:
     + Opening/closing TAProL scopes (top scope 0 "GLOBAL" is open automatically);
     + Creation/destruction of named vector spaces and their named subspaces;
11
     + Registration/retrieval of external data (class BytePacket);
12
     + Registration/retrieval of external tensor methods (class TensorFunctor);
13
14
     + Submission for processing of individual tensor operations, tensor networks
       and tensor network expansions;
15
16
     + Higher-level methods for tensor creation, destruction, and operations on them,
       which use symbolic tensor names for tensor identification and processing.
17
18
19
 (b) Processing of individual tensor operations, tensor networks and tensor network expansions
     has asynchronous semantics: Submit TensorOperation/TensorNetwork/TensorExpansion for
     processing, then synchronize on the tensor-result or the accumulator tensor.
20
21
     Processing of a tensor operation means evaluating all its output tensor operands.
     Processing of a tensor network means evaluating its output tensor (#0),
22
23
24
25
26
     which is automatically initialized to zero before the evaluation.
     Processing of a tensor network expansion means evaluating all constituent
     tensor network components and accumulating them into the accumulator tensor
     with their respective prefactors. Synchronization of processing of a tensor operation,
     tensor network or tensor network expansion means ensuring that the tensor-result,
27
     either the output tensor or the accumulator tensor, has been fully computed.
28
29
30
 (c) Namespace exatn introduces a number of aliases for types imported from exatn::numerics.
     Importantly exatn::TensorMethod, which is talsh::TensorFunctor<Indentifiable>,
     defines the interface which needs to be implemented by the application in order
31
32
     to perform a custom unary transformation/initialization operation on exatn::Tensor.
     This is the only portable way to modify the tensor content in a desired way.
33
34
**/

35
36
#ifndef EXATN_NUM_SERVER_HPP_
#define EXATN_NUM_SERVER_HPP_
37
38

#include "tensor_basic.hpp"
39
#include "space_register.hpp"
40
#include "tensor_range.hpp"
41
#include "tensor.hpp"
42
#include "tensor_composite.hpp"
43
#include "tensor_operation.hpp"
44
#include "tensor_op_factory.hpp"
45
#include "tensor_symbol.hpp"
46
47
48
#include "tensor_network.hpp"
#include "tensor_operator.hpp"
#include "tensor_expansion.hpp"
49
50
#include "network_build_factory.hpp"
#include "contraction_seq_optimizer_factory.hpp"
51

52
53
#include "tensor_runtime.hpp"

54
#include "Identifiable.hpp"
55
56
#include "tensor_method.hpp"
#include "functor_init_val.hpp"
57
#include "functor_init_rnd.hpp"
58
#include "functor_init_dat.hpp"
59
#include "functor_init_delta.hpp"
60
#include "functor_init_proj.hpp"
61
#include "functor_init_file.hpp"
62
#include "functor_scale.hpp"
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
63
#include "functor_maxabs.hpp"
64
65
#include "functor_norm1.hpp"
#include "functor_norm2.hpp"
66
#include "functor_diag_rank.hpp"
67
#include "functor_print.hpp"
68

69
70
#include <iostream>
#include <fstream>
71
#include <memory>
72
#include <vector>
73
#include <string>
74
#include <stack>
75
#include <list>
76
#include <map>
77

78
79
#include "errors.hpp"

80
81
using exatn::Identifiable;

82
83
namespace exatn{

84
//Primary numerics:: types exposed to the user:
85
86
using numerics::VectorSpace;
using numerics::Subspace;
87
using numerics::TensorHashType;
88
using numerics::TensorRange;
89
90
91
92
using numerics::TensorShape;
using numerics::TensorSignature;
using numerics::TensorLeg;
using numerics::Tensor;
93
using numerics::TensorComposite;
94
using numerics::TensorOperation;
95
using numerics::TensorOpFactory;
96
using numerics::TensorNetwork;
97
98
99
100
101
102
103
104
using numerics::TensorOperator;
using numerics::TensorExpansion;

using numerics::NetworkBuilder;
using numerics::NetworkBuildFactory;

using numerics::ContractionSeqOptimizer;
using numerics::ContractionSeqOptimizerFactory;
105

106
107
using numerics::FunctorInitVal;
using numerics::FunctorInitRnd;
108
using numerics::FunctorInitDat;
109
using numerics::FunctorInitDelta;
110
using numerics::FunctorInitProj;
111
using numerics::FunctorInitFile;
112
using numerics::FunctorScale;
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
113
using numerics::FunctorMaxAbs;
114
115
using numerics::FunctorNorm1;
using numerics::FunctorNorm2;
116
using numerics::FunctorDiagRank;
117
using numerics::FunctorPrint;
118

119
120
using TensorMethod = talsh::TensorFunctor<Identifiable>;

121

122
/** Returns the closest owner id (process rank) for a given subtensor. **/
123
124
125
126
127
unsigned int subtensor_owner_id(unsigned int process_rank,          //in: current process rank
                                unsigned int num_processes,         //in: total number of processes
                                unsigned long long subtensor_id,    //in: id of the required subtensor
                                unsigned long long num_subtensors); //in: total number of subtensors

128
/** Returns a range of subtensors [begin,end] owned by the specified process. **/
129
std::pair<unsigned long long, unsigned long long> owned_subtensors(
130
                                                   unsigned int process_rank,          //in: target process rank
131
132
133
                                                   unsigned int num_processes,         //in: total number of processes
                                                   unsigned long long num_subtensors); //in: total number of subtensors

134
/** Returns the subtensor replication level (number of replicated subtensors). **/
135
136
137
138
unsigned int replication_level(const ProcessGroup & process_group, //in: process group
                               std::shared_ptr<Tensor> & tensor);  //in: tensor


139
140
141
142
//Composite tensor mapper (helper):
class CompositeTensorMapper: public TensorMapper{
public:

143
144
145
146
#ifdef MPI_ENABLED
 CompositeTensorMapper(const MPICommProxy & communicator,
                       unsigned int current_rank_in_group,
                       unsigned int num_processes_in_group,
147
                       std::size_t memory_per_process,
148
149
                       const std::unordered_map<std::string,std::shared_ptr<Tensor>> & local_tensors):
  current_process_rank_(current_rank_in_group), group_num_processes_(num_processes_in_group),
150
  memory_per_process_(memory_per_process), intra_comm_(communicator), local_tensors_(local_tensors) {}
151
#else
152
153
 CompositeTensorMapper(unsigned int current_rank_in_group,
                       unsigned int num_processes_in_group,
154
                       std::size_t memory_per_process,
155
156
                       const std::unordered_map<std::string,std::shared_ptr<Tensor>> & local_tensors):
  current_process_rank_(current_rank_in_group), group_num_processes_(num_processes_in_group),
157
  memory_per_process_(memory_per_process), local_tensors_(local_tensors) {}
158
#endif
159
160
161

 virtual ~CompositeTensorMapper() = default;

162
163
164
165
166
167
168
169
170
171
172
173
 /** Returns the closest-to-the-target process rank owning the given subtensor. **/
 virtual unsigned int subtensorOwnerId(unsigned int target_process_rank,                 //in: target process rank
                                       unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                                       unsigned long long num_subtensors) const override //in: total number of subtensors
 {
  assert(target_process_rank < group_num_processes_);
  return subtensor_owner_id(target_process_rank,group_num_processes_,subtensor_id,num_subtensors);
 }

 /** Returns the closest process rank owning the given subtensor. **/
 virtual unsigned int subtensorOwnerId(unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                                       unsigned long long num_subtensors) const override //in: total number of subtensors
174
175
176
177
 {
  return subtensor_owner_id(current_process_rank_,group_num_processes_,subtensor_id,num_subtensors);
 }

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
 /** Returns the first process rank owning the given subtensor (first replica). **/
 virtual unsigned int subtensorFirstOwnerId(unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                                            unsigned long long num_subtensors) const override //in: total number of subtensors
 {
  return subtensor_owner_id(0,group_num_processes_,subtensor_id,num_subtensors);
 }

 /** Returns the number of replicas of a given subtensor within the process group. **/
 virtual unsigned int subtensorNumReplicas(unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                                           unsigned long long num_subtensors) const override //in: total number of subtensors
 {
  unsigned int num_replicas = 1;
  if(num_subtensors < group_num_processes_){
   assert(group_num_processes_ % num_subtensors == 0);
   num_replicas = group_num_processes_ / num_subtensors;
  }
  return num_replicas;
 }

 /** Returns the range of subtensors (by their ids) owned by the given process rank. **/
 virtual std::pair<unsigned long long, unsigned long long> ownedSubtensors(unsigned int process_rank,                        //in: target process rank
                                                                           unsigned long long num_subtensors) const override //in: total number of subtensors
200
201
202
203
 {
  return owned_subtensors(process_rank,group_num_processes_,num_subtensors);
 }

204
205
206
 /** Returns whether or not the given subtensor is owned by the current process. **/
 virtual bool isLocalSubtensor(unsigned long long subtensor_id,                  //in: subtensor id (binary mask)
                               unsigned long long num_subtensors) const override //in: total number of subtensors
207
208
209
210
 {
  return (subtensorOwnerId(subtensor_id,num_subtensors) == current_process_rank_);
 }

211
212
 /** Returns whether or not the given subtensor is owned by the current process. **/
 virtual bool isLocalSubtensor(const Tensor & subtensor) const override //in: subtensor
213
214
215
216
 {
  return (local_tensors_.find(subtensor.getName()) != local_tensors_.cend());
 }

217
218
219
 /** Returns the amount of memory per process. **/
 virtual std::size_t getMemoryPerProcess() const override {return memory_per_process_;}

220
 /** Returns the rank of the current process. **/
221
222
 virtual unsigned int getProcessRank() const override {return current_process_rank_;}

223
 /** Returns the total number of processes in the group. **/
224
225
 virtual unsigned int getNumProcesses() const override {return group_num_processes_;}

226
 /** Returns the MPI intra-communicator associated with the group of processes. **/
227
228
 virtual const MPICommProxy & getMPICommProxy() const override {return intra_comm_;}

229
230
231
232
private:

 unsigned int current_process_rank_; //rank of the current process (in some process group)
 unsigned int group_num_processes_;  //total number of processes (in some process group)
233
 std::size_t memory_per_process_;    //amount of memory (bytes) per process
234
 MPICommProxy intra_comm_;           //MPI communicator for the process group
235
236
237
 const std::unordered_map<std::string,std::shared_ptr<Tensor>> & local_tensors_; //locally stored tensors
};

238

239
//Numerical server:
240
class NumServer final {
241
242
243

public:

244
#ifdef MPI_ENABLED
245
 NumServer(const MPICommProxy & communicator,                               //MPI communicator proxy
246
           const ParamConf & parameters,                                    //runtime configuration parameters
247
           const std::string & graph_executor_name = "lazy-dag-executor",   //DAG executor kind
248
249
           const std::string & node_executor_name = "talsh-node-executor"); //DAG node executor kind
#else
250
 NumServer(const ParamConf & parameters,                                    //runtime configuration parameters
251
           const std::string & graph_executor_name = "lazy-dag-executor",   //DAG executor kind
252
253
           const std::string & node_executor_name = "talsh-node-executor"); //DAG node executor kind
#endif
254

255
256
 NumServer(const NumServer &) = delete;
 NumServer & operator=(const NumServer &) = delete;
257
258
 NumServer(NumServer &&) noexcept = delete;
 NumServer & operator=(NumServer &&) noexcept = delete;
259
 ~NumServer();
260

261
 /** Reconfigures tensor runtime implementation. **/
262
#ifdef MPI_ENABLED
263
 void reconfigureTensorRuntime(const MPICommProxy & communicator,
264
                               const ParamConf & parameters,
265
266
267
                               const std::string & dag_executor_name,
                               const std::string & node_executor_name);
#else
268
269
 void reconfigureTensorRuntime(const ParamConf & parameters,
                               const std::string & dag_executor_name,
270
                               const std::string & node_executor_name);
271
#endif
272

273
274
 /** Resets the tensor contraction sequence optimizer that is
     invoked when evaluating tensor networks. **/
275
276
277
278
 void resetContrSeqOptimizer(const std::string & optimizer_name, //in: tensor contraction sequence optimizer name
                             bool caching = false);              //whether or not optimized tensor contraction sequence will be cached for later reuse

 /** Activates optimized tensor contraction sequence caching for later reuse. **/
279
 void activateContrSeqCaching(bool persist = false);
280
281
282

 /** Deactivates optimized tensor contraction sequence caching. **/
 void deactivateContrSeqCaching();
283

284
285
286
 /** Queries the status of optimized tensor contraction sequence caching. **/
 bool queryContrSeqCaching() const;

287
288
289
 /** Resets the client logging level (0:none). **/
 void resetClientLoggingLevel(int level = 0);

290
291
292
 /** Resets the runtime logging level (0:none). **/
 void resetRuntimeLoggingLevel(int level = 0);

293
294
295
296
 /** Resets tensor operation execution serialization. **/
 void resetExecutionSerialization(bool serialize,
                                  bool validation_trace = false);

297
298
299
 /** Activates/deactivates dry run (no actual computations). **/
 void activateDryRun(bool dry_run);

300
301
302
 /** Activates mixed-precision fast math operations on all devices (if available). **/
 void activateFastMath();

303
 /** Returns the Host memory buffer size in bytes provided by the runtime. **/
304
 std::size_t getMemoryBufferSize() const;
305

306
307
308
 /** Returns the current value of the Flop counter. **/
 double getTotalFlopCount() const;

309
310
311
 /** Returns the default process group comprising all MPI processes and their communicator. **/
 const ProcessGroup & getDefaultProcessGroup() const;

312
 /** Returns the current process group comprising solely the current MPI process and its own self-communicator. **/
313
314
 const ProcessGroup & getCurrentProcessGroup() const;

315
316
317
 /** Returns the local rank of the MPI process in a given process group, or -1 if it does not belong to it. **/
 int getProcessRank(const ProcessGroup & process_group) const;

318
319
320
 /** Returns the global rank of the current MPI process in the default process group. **/
 int getProcessRank() const;

321
322
323
 /** Returns the number of MPI processes in a given process group. **/
 int getNumProcesses(const ProcessGroup & process_group) const;

324
325
326
 /** Returns the total number of MPI processes in the default process group. **/
 int getNumProcesses() const;

327
328
329
330
331
332
 /** Returns a composite tensor mapper for a given process group. **/
 std::shared_ptr<TensorMapper> getTensorMapper(const ProcessGroup & process_group) const;

 /** Returns a composite tensor mapper for the default process group (all processes). **/
 std::shared_ptr<TensorMapper> getTensorMapper() const;

333
 /** Registers an external tensor method. **/
334
335
 void registerTensorMethod(const std::string & tag,
                           std::shared_ptr<TensorMethod> method);
336

337
 /** Retrieves a registered external tensor method. **/
338
 std::shared_ptr<TensorMethod> getTensorMethod(const std::string & tag);
339
340

 /** Registers an external data packet. **/
341
342
 void registerExternalData(const std::string & tag,
                           std::shared_ptr<BytePacket> packet);
343
344
345

 /** Retrieves a registered external data packet. **/
 std::shared_ptr<BytePacket> getExternalData(const std::string & tag);
346

347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364

 /** Opens a new (child) TAProL scope and returns its id. **/
 ScopeId openScope(const std::string & scope_name); //new scope name

 /** Closes the currently open TAProL scope and returns its parental scope id. **/
 ScopeId closeScope();


 /** Creates a named vector space, returns its registered id, and,
     optionally, a non-owning pointer to it. **/
 SpaceId createVectorSpace(const std::string & space_name,            //in: vector space name
                           DimExtent space_dim,                       //in: vector space dimension
                           const VectorSpace ** space_ptr = nullptr); //out: non-owning pointer to the created vector space

 /** Destroys a previously created named vector space. **/
 void destroyVectorSpace(const std::string & space_name); //in: name of the vector space to destroy
 void destroyVectorSpace(SpaceId space_id);               //in: id of the vector space to destroy

365
366
367
368
 /** Returns a non-owning pointer to a previosuly registered vector space,
     including the anonymous vector space. **/
 const VectorSpace * getVectorSpace(const std::string & space_name) const;

369
370
371

 /** Creates a named subspace of a named vector space,
     returns its registered id, and, optionally, a non-owning pointer to it. **/
372
373
374
375
 SubspaceId createSubspace(const std::string & subspace_name,         //in: subspace name
                           const std::string & space_name,            //in: containing vector space name
                           std::pair<DimOffset,DimOffset> bounds,     //in: range of basis vectors defining the created subspace
                           const Subspace ** subspace_ptr = nullptr); //out: non-owning pointer to the created subspace
376

377
 /** Destroys a previously created named subspace of a named vector space. **/
378
 void destroySubspace(const std::string & subspace_name); //in: name of the subspace to destroy
379
 void destroySubspace(SubspaceId subspace_id); //in: id of the subspace to destroy
380

381
382
383
384
 /** Returns a non-owning pointer to a previosuly registered named subspace
     of a previously registered named vector space. **/
 const Subspace * getSubspace(const std::string & subspace_name) const;

385

386
 /** Submits an individual (simple or composite) tensor operation for processing.
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
387
388
389
     Composite tensor operations require an implementation of the TensorMapper interface. **/
 bool submit(std::shared_ptr<TensorOperation> operation,   //in: tensor operation for numerical evaluation
             std::shared_ptr<TensorMapper> tensor_mapper); //in: tensor mapper (for composite tensor operations only)
390
391
392

 /** Submits a tensor network for processing (evaluating the output tensor-result).
     If the output (result) tensor has not been created yet, it will be created and
393
394
395
     initialized to zero automatically, and later destroyed automatically when no longer needed.
     By default all parallel processes will be processing the tensor network,
     otherwise the desired process subset needs to be explicitly specified. **/
396
397
398
399
400
401
 bool submit(TensorNetwork & network);                    //in: tensor network for numerical evaluation
 bool submit(std::shared_ptr<TensorNetwork> network);     //in: tensor network for numerical evaluation
 bool submit(const ProcessGroup & process_group,          //in: chosen group of MPI processes
             TensorNetwork & network);                    //in: tensor network for numerical evaluation
 bool submit(const ProcessGroup & process_group,          //in: chosen group of MPI processes
             std::shared_ptr<TensorNetwork> network);     //in: tensor network for numerical evaluation
402
403
404

 /** Submits a tensor network expansion for processing (evaluating output tensors of all
     constituting tensor networks and accumualting them in the provided accumulator tensor).
405
406
407
     Synchronization of the tensor expansion evaluation is done via syncing on the accumulator
     tensor. By default all parallel processes will be processing the tensor network,
     otherwise the desired process subset needs to be explicitly specified. **/
408
 bool submit(TensorExpansion & expansion,                 //in: tensor expansion for numerical evaluation
409
410
             std::shared_ptr<Tensor> accumulator,         //inout: tensor accumulator (result)
             unsigned int parallel_width = 1);            //in: requested number of execution subgroups running in parallel
411
 bool submit(std::shared_ptr<TensorExpansion> expansion,  //in: tensor expansion for numerical evaluation
412
413
             std::shared_ptr<Tensor> accumulator,         //inout: tensor accumulator (result)
             unsigned int parallel_width = 1);            //in: requested number of execution subgroups running in parallel
414
415
 bool submit(const ProcessGroup & process_group,          //in: chosen group of MPI processes
             TensorExpansion & expansion,                 //in: tensor expansion for numerical evaluation
416
417
             std::shared_ptr<Tensor> accumulator,         //inout: tensor accumulator (result)
             unsigned int parallel_width = 1);            //in: requested number of execution subgroups running in parallel
418
419
 bool submit(const ProcessGroup & process_group,          //in: chosen group of MPI processes
             std::shared_ptr<TensorExpansion> expansion,  //in: tensor expansion for numerical evaluation
420
421
             std::shared_ptr<Tensor> accumulator,         //inout: tensor accumulator (result)
             unsigned int parallel_width = 1);            //in: requested number of execution subgroups running in parallel
422

423
 /** Synchronizes all update operations on a given tensor.
424
425
     Changing wait to FALSE, only tests for completion.
     If ProcessGroup is not provided, defaults to the local process. **/
426
 bool sync(const Tensor & tensor,
427
           bool wait = true);
428
429
430
 bool sync(const ProcessGroup & process_group,
           const Tensor & tensor,
           bool wait = true);
431
 /** Synchronizes execution of a specific tensor network.
432
433
     Changing wait to FALSE, only tests for completion.
     If ProcessGroup is not provided, defaults to the local process. **/
434
 bool sync(TensorNetwork & network,
435
           bool wait = true);
436
437
438
 bool sync(const ProcessGroup & process_group,
           TensorNetwork & network,
           bool wait = true);
439
 /** Synchronizes execution of all outstanding tensor operations.
440
441
     Changing wait to FALSE, only tests for completion.
     If ProcessGroup is not provided, defaults to the local process. **/
442
 bool sync(bool wait = true);
443
444
 bool sync(const ProcessGroup & process_group,
           bool wait = true);
445

446
447
 /** HIGHER-LEVEL WRAPPERS **/

448
449
 /** Synchronizes all outstanding update operations on a given tensor.
     If ProcessGroup is not provided, defaults to the local process. **/
450
451
 bool sync(const std::string & name, //in: tensor name
           bool wait = true);        //in: wait versus test for completion
452
453
454
 bool sync(const ProcessGroup & process_group,
           const std::string & name, //in: tensor name
           bool wait = true);        //in: wait versus test for completion
455

456
457
458
 /** Checks whether a given tensor has been allocated storage (created). **/
 bool tensorAllocated(const std::string & name) const; //in: tensor name

459
460
461
 /** Returns a shared pointer to the requested tensor object. **/
 std::shared_ptr<Tensor> getTensor(const std::string & name); //in: tensor name

462
 /** Returns a reference to the requested tensor object. **/
463
 Tensor & getTensorRef(const std::string & name); //in: tensor name
464

465
 /** Returns the tensor element type. **/
466
 TensorElementType getTensorElementType(const std::string & name) const; //in: tensor name
467

468
469
470
471
472
473
474
475
476
 /** Registers a group of tensor dimensions which form an isometry when
     contracted over with the conjugated tensor (see exatn::numerics::Tensor).
     Returns TRUE on success, FALSE on failure. **/
 bool registerTensorIsometry(const std::string & name,                     //in: tensor name
                             const std::vector<unsigned int> & iso_dims);  //in: tensor dimensions forming the isometry
 bool registerTensorIsometry(const std::string & name,                     //in: tensor name
                             const std::vector<unsigned int> & iso_dims0,  //in: tensor dimensions forming the isometry (group 0)
                             const std::vector<unsigned int> & iso_dims1); //in: tensor dimensions forming the isometry (group 1)

477
478
479
480
481
482
483
484
485
486
 /** Returns TRUE if the calling process is within the existence domain of all given tensors, FALSE otherwise. **/
 template <typename... Args>
 bool withinTensorExistenceDomain(const std::string & tensor_name, Args&&... tensor_names) const //in: tensor names
 {
  if(!withinTensorExistenceDomain(tensor_name)) return false;
  return withinTensorExistenceDomain(std::forward<Args>(tensor_names)...);
 }

 bool withinTensorExistenceDomain(const std::string & tensor_name) const; //in: tensor name

487
 /** Returns the process group associated with the given tensors, that is,
488
489
     the intersection of existence domains of the given tensors. Note that
     the existence domains of the given tensors must be properly nested,
490
      tensorA <= tensorB <= tensorC <= ... <= tensorZ,
491
492
493
494
     for some order of the tensors, otherwise the code will result in
     an undefined behavior. It is user's responsibility to ensure that
     the returned process group is also a subdomain of full presence
     for all participating tensors. **/
495
496
497
498
499
 template <typename... Args>
 const ProcessGroup & getTensorProcessGroup(const std::string & tensor_name, Args&&... tensor_names) const //in: tensor names
 {
  const auto & tensor_domain = getTensorProcessGroup(tensor_name);
  const auto & other_tensors_domain = getTensorProcessGroup(std::forward<Args>(tensor_names)...);
500
501
502
503
504
  if(tensor_domain.isContainedIn(other_tensors_domain)){
   return tensor_domain;
  }else if(other_tensors_domain.isContainedIn(tensor_domain)){
   return other_tensors_domain;
  }else{
505
   std::cout << "#ERROR(exatn::getTensorProcessGroup): Tensor operand existence domains must be properly nested: "
506
507
             << "Tensor " << tensor_name << " is not properly nested w.r.t. tensors ";
   print_variadic_pack(std::forward<Args>(tensor_names)...); std::cout << std::endl;
508
509
   const auto & tensor_domain_ranks = tensor_domain.getProcessRanks();
   const auto & other_tensors_domain_ranks = other_tensors_domain.getProcessRanks();
510
   std::cout << tensor_name << ":" << std::endl;
511
512
   for(const auto & proc_rank: tensor_domain_ranks) std::cout << " " << proc_rank;
   std::cout << std::endl;
513
   print_variadic_pack(std::forward<Args>(tensor_names)...); std::cout << ":" << std::endl;
514
515
   for(const auto & proc_rank: other_tensors_domain_ranks) std::cout << " " << proc_rank;
   std::cout << std::endl;
516
517
   assert(false);
  };
518
  return getDefaultProcessGroup();
519
520
521
 }

 const ProcessGroup & getTensorProcessGroup(const std::string & tensor_name) const; //tensor name
522

523
 /** Declares, registers, and actually creates a tensor via the processing backend.
524
525
     See numerics::Tensor constructors for different creation options. **/
 template <typename... Args>
526
527
528
 bool createTensor(const std::string & name,       //in: tensor name
                   TensorElementType element_type, //in: tensor element type
                   Args&&... args);                //in: other arguments for Tensor ctor
529

530
531
532
533
534
 template <typename... Args>
 bool createTensorSync(const std::string & name,       //in: tensor name
                       TensorElementType element_type, //in: tensor element type
                       Args&&... args);                //in: other arguments for Tensor ctor

535
536
537
538
 bool createTensor(std::shared_ptr<Tensor> tensor,  //in: existing declared tensor (can be composite)
                   TensorElementType element_type); //in: tensor element type

 bool createTensorSync(std::shared_ptr<Tensor> tensor,  //in: existing declared tensor (can be composite)
539
540
                       TensorElementType element_type); //in: tensor element type

541
542
543
544
 bool createTensor(const std::string & name,          //in: tensor name
                   const TensorSignature & signature, //in: tensor signature with registered spaces/subspaces
                   TensorElementType element_type);   //in: tensor element type

545
546
547
548
549
550
551
552
553
554
555
556
 template <typename... Args>
 bool createTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                   const std::string & name,           //in: tensor name
                   TensorElementType element_type,     //in: tensor element type
                   Args&&... args);                    //in: other arguments for Tensor ctor

 template <typename... Args>
 bool createTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                       const std::string & name,           //in: tensor name
                       TensorElementType element_type,     //in: tensor element type
                       Args&&... args);                    //in: other arguments for Tensor ctor

557
558
559
560
 bool createTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                   std::shared_ptr<Tensor> tensor,     //in: existing declared tensor (can be composite)
                   TensorElementType element_type);    //in: tensor element type

561
 bool createTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
562
                       std::shared_ptr<Tensor> tensor,     //in: existing declared tensor (can be composite)
563
564
                       TensorElementType element_type);    //in: tensor element type

565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
 /** Creates and allocates storage for a composite tensor distributed over a given process group.
     The distribution of tensor blocks is dictated by its split dimensions. **/
 template <typename... Args>
 bool createTensor(const ProcessGroup & process_group,                      //in: chosen group of MPI processes
                   const std::string & name,                                //in: tensor name
                   const std::vector<std::pair<unsigned int,
                                               unsigned int>> & split_dims, //in: split tensor dimensions: pair{Dimension,MaxDepth}
                   TensorElementType element_type,                          //in: tensor element type
                   Args&&... args);                                         //in: other arguments for Tensor ctor

 template <typename... Args>
 bool createTensorSync(const ProcessGroup & process_group,                      //in: chosen group of MPI processes
                       const std::string & name,                                //in: tensor name
                       const std::vector<std::pair<unsigned int,
                                                   unsigned int>> & split_dims, //in: split tensor dimensions: pair{Dimension,MaxDepth}
                       TensorElementType element_type,                          //in: tensor element type
                       Args&&... args);                                         //in: other arguments for Tensor ctor

583
 /** Creates all input tensors in a given tensor network that are still unallocated. **/
584
585
586
587
588
589
590
591
592
593
594
595
596
597
 bool createTensors(TensorNetwork & tensor_network,         //inout: tensor network
                    TensorElementType element_type);        //in: tensor element type

 bool createTensorsSync(TensorNetwork & tensor_network,     //inout: tensor network
                        TensorElementType element_type);    //in: tensor element type

 bool createTensors(const ProcessGroup & process_group,     //in: chosen group of MPI processes
                    TensorNetwork & tensor_network,         //inout: tensor network
                    TensorElementType element_type);        //in: tensor element type

 bool createTensorsSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                        TensorNetwork & tensor_network,     //inout: tensor network
                        TensorElementType element_type);    //in: tensor element type

598
 /** Creates all input tensors in a given tensor network expansion that are still unallocated. **/
599
600
601
602
603
604
605
606
607
608
609
610
611
612
 bool createTensors(TensorExpansion & tensor_expansion,     //inout: tensor expansion
                    TensorElementType element_type);        //in: tensor element type

 bool createTensorsSync(TensorExpansion & tensor_expansion, //inout: tensor expansion
                        TensorElementType element_type);    //in: tensor element type

 bool createTensors(const ProcessGroup & process_group,     //in: chosen group of MPI processes
                    TensorExpansion & tensor_expansion,     //inout: tensor expansion
                    TensorElementType element_type);        //in: tensor element type

 bool createTensorsSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                        TensorExpansion & tensor_expansion, //inout: tensor expansion
                        TensorElementType element_type);    //in: tensor element type

613
614
615
 /** Destroys a tensor, including its backend representation. **/
 bool destroyTensor(const std::string & name); //in: tensor name

616
617
 bool destroyTensorSync(const std::string & name); //in: tensor name

618
619
620
621
622
623
 /** Destroys all currently allocated tensors in a given tensor network.
     Note that the destroyed tensors could also be present in other tensor networks. **/
 bool destroyTensors(TensorNetwork & tensor_network);     //inout: tensor network

 bool destroyTensorsSync(TensorNetwork & tensor_network); //inout: tensor network

624
625
626
 /** Initializes a tensor to some scalar value. **/
 template<typename NumericType>
 bool initTensor(const std::string & name, //in: tensor name
627
628
629
630
631
                 NumericType value);       //in: scalar value

 template<typename NumericType>
 bool initTensorSync(const std::string & name, //in: tensor name
                     NumericType value);       //in: scalar value
632

633
634
635
636
637
638
639
640
641
642
643
 /** Initializes a tensor with externally provided data.
     The vector containing externally provided data assumes
     the column-wise storage in the initialized tensor.  **/
 template<typename NumericType>
 bool initTensorData(const std::string & name,                   //in: tensor name
                     const std::vector<NumericType> & ext_data); //in: vector with externally provided data

 template<typename NumericType>
 bool initTensorDataSync(const std::string & name,                   //in: tensor name
                         const std::vector<NumericType> & ext_data); //in: vector with externally provided data

644
645
646
647
648
649
650
651
652
653
654
655
656
657
/** Initializes a tensor with externally provided data read from a file with format:
     Storage format (string: {dense|list})
     Tensor name
     Tensor shape (space-separated dimension extents)
     Tensor signature (space-separated dimension base offsets)
     Tensor elements:
      Dense format: Numeric values (column-wise order), any number of values per line
      List format: Numeric value and Multi-index in each line **/
 bool initTensorFile(const std::string & name,      //in: tensor name
                     const std::string & filename); //in: file name with tensor data

 bool initTensorFileSync(const std::string & name,      //in: tensor name
                         const std::string & filename); //in: file name with tensor data

658
 /** Initializes a tensor to some random value. **/
659
 bool initTensorRnd(const std::string & name);     //in: tensor name
660
661
662

 bool initTensorRndSync(const std::string & name); //in: tensor name

663
664
665
666
667
 /** Initializes all input tensors of a given tensor network to a random value. **/
 bool initTensorsRnd(TensorNetwork & tensor_network);     //inout: tensor network

 bool initTensorsRndSync(TensorNetwork & tensor_network); //inout: tensor network

668
669
670
671
672
 /** Initializes all input tensors in a given tensor network expansion to a random value. **/
 bool initTensorsRnd(TensorExpansion & tensor_expansion);     //inout: tensor network expansion

 bool initTensorsRndSync(TensorExpansion & tensor_expansion); //inout: tensor network expansion

673
674
675
676
677
 /** Initializes special tensors present in the tensor network. **/
 bool initTensorsSpecial(TensorNetwork & tensor_network);     //inout: tensor network

 bool initTensorsSpecialSync(TensorNetwork & tensor_network); //inout: tensor network

Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
678
679
680
681
 /** Computes max-abs norm of a tensor. **/
 bool computeMaxAbsSync(const std::string & name, //in: tensor name
                        double & norm);           //out: tensor norm

682
683
684
685
686
687
688
689
 /** Computes 1-norm of a tensor. **/
 bool computeNorm1Sync(const std::string & name, //in: tensor name
                       double & norm);           //out: tensor norm

 /** Computes 2-norm of a tensor. **/
 bool computeNorm2Sync(const std::string & name, //in: tensor name
                       double & norm);           //out: tensor norm

690
691
692
693
694
 /** Computes partial 2-norms over a chosen tensor dimension. **/
 bool computePartialNormsSync(const std::string & name,             //in: tensor name
                              unsigned int tensor_dimension,        //in: chosen tensor dimension
                              std::vector<double> & partial_norms); //out: partial 2-norms over the chosen tensor dimension

695
696
697
698
 /** Computes 2-norms of all tensors in a tensor network. **/
 bool computeNorms2Sync(const TensorNetwork & network,         //in: tensor network
                        std::map<std::string,double> & norms); //out: tensor norms: tensor_name --> norm

699
700
701
702
703
 /** Replicates a tensor within the given process group, which defaults to all MPI processes.
     Only the root_process_rank within the given process group is required to have the tensor,
     that is, the tensor will automatically be created in those MPI processes which do not have it.  **/
 bool replicateTensor(const std::string & name,           //in: tensor name
                      int root_process_rank);             //in: local rank of the root process within the given process group
704

705
706
707
 bool replicateTensorSync(const std::string & name,       //in: tensor name
                          int root_process_rank);         //in: local rank of the root process within the given process group

708
709
710
 bool replicateTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                      const std::string & name,           //in: tensor name
                      int root_process_rank);             //in: local rank of the root process within the given process group
711

712
713
714
715
 bool replicateTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                          const std::string & name,           //in: tensor name
                          int root_process_rank);             //in: local rank of the root process within the given process group

716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
 /** Shrinks the domain of existence of a given tensor to a single process. **/
 bool dereplicateTensor(const std::string & name,     //in: tensor name
                        int root_process_rank);       //in: local rank of the chosen process

 bool dereplicateTensorSync(const std::string & name, //in: tensor name
                            int root_process_rank);   //in: local rank of the chosen process

 bool dereplicateTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                        const std::string & name,           //in: tensor name
                        int root_process_rank);             //in: local rank of the chose process

 bool dereplicateTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                            const std::string & name,           //in: tensor name
                            int root_process_rank);             //in: local rank of the chosen process

731
732
733
734
735
736
737
738
 /** Broadcast a tensor among all MPI processes within a given process group,
     which defaults to all MPI processes. This function is needed when
     a tensor is updated in an operation submitted to a subset of MPI processes
     such that the excluded MPI processes can receive an updated version of the tensor.
     Note that the tensor must exist in all participating MPI processes. **/
 bool broadcastTensor(const std::string & name,           //in: tensor name
                      int root_process_rank);             //in: local rank of the root process within the given process group

739
740
741
 bool broadcastTensorSync(const std::string & name,       //in: tensor name
                          int root_process_rank);         //in: local rank of the root process within the given process group

742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
 bool broadcastTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                      const std::string & name,           //in: tensor name
                      int root_process_rank);             //in: local rank of the root process within the given process group

 bool broadcastTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                          const std::string & name,           //in: tensor name
                          int root_process_rank);             //in: local rank of the root process within the given process group

 /** Performs a global sum reduction on a tensor among all MPI processes within a given
     process group, which defaults to all MPI processes. This function is needed when
     multiple MPI processes compute their local updates to the tensor, thus requiring
     a global sum reduction such that each MPI process will get the final (same) tensor
     value. Note that the tensor must exist in all participating MPI processes. **/
 bool allreduceTensor(const std::string & name);          //in: tensor name

757
758
 bool allreduceTensorSync(const std::string & name);      //in: tensor name

759
760
761
762
763
764
 bool allreduceTensor(const ProcessGroup & process_group, //in: chosen group of MPI processes
                      const std::string & name);          //in: tensor name

 bool allreduceTensorSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                          const std::string & name);          //in: tensor name

765
766
767
768
769
770
771
772
773
 /** Scales a tensor by a scalar value. **/
 template<typename NumericType>
 bool scaleTensor(const std::string & name, //in: tensor name
                  NumericType value);       //in: scalar value

 template<typename NumericType>
 bool scaleTensorSync(const std::string & name, //in: tensor name
                      NumericType value);       //in: scalar value

774
 /** Transforms (updates) a tensor according to a user-defined tensor functor. **/
775
 bool transformTensor(const std::string & name,               //in: tensor name
776
                      std::shared_ptr<TensorMethod> functor); //in: functor defining the tensor transformation
777
778

 bool transformTensorSync(const std::string & name,               //in: tensor name
779
780
781
782
783
784
785
                          std::shared_ptr<TensorMethod> functor); //in: functor defining the tensor transformation

 bool transformTensor(const std::string & name,              //in: tensor name
                      const std::string & functor_name);     //in: name of the functor defining the tensor transformation

 bool transformTensorSync(const std::string & name,          //in: tensor name
                          const std::string & functor_name); //in: name of the functor defining the tensor transformation
786

787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
 /** Extracts a slice from a tensor and stores it in another tensor
     the signature and shape of which determines which slice to extract. **/
 bool extractTensorSlice(const std::string & tensor_name, //in: tensor name
                         const std::string & slice_name); //in: slice name

 bool extractTensorSliceSync(const std::string & tensor_name, //in: tensor name
                             const std::string & slice_name); //in: slice name

 /** Inserts a slice into a tensor. The signature and shape of the slice
     determines the position in the tensor where the slice will be inserted. **/
 bool insertTensorSlice(const std::string & tensor_name, //in: tensor name
                        const std::string & slice_name); //in: slice name

 bool insertTensorSliceSync(const std::string & tensor_name, //in: tensor name
                            const std::string & slice_name); //in: slice name

803
 /** Assigns one tensor to another congruent one (makes a copy of a tensor).
804
805
806
     If the output tensor with the given name does not exist, it will be created.
     Note that the output tensor must either exist or not exist across all
     participating processes, otherwise it will result in an undefined behavior! **/
807
808
809
810
811
812
 bool copyTensor(const std::string & output_name, //in: output tensor name
                 const std::string & input_name); //in: input tensor name

 bool copyTensorSync(const std::string & output_name, //in: output tensor name
                     const std::string & input_name); //in: input tensor name

813
814
 /** Performs tensor addition: tensor0 += tensor1 * alpha **/
 template<typename NumericType>
815
816
 bool addTensors(const std::string & addition, //in: symbolic tensor addition specification
                 NumericType alpha);           //in: alpha prefactor
817

818
819
820
821
 template<typename NumericType>
 bool addTensorsSync(const std::string & addition, //in: symbolic tensor addition specification
                     NumericType alpha);           //in: alpha prefactor

822
823
 /** Performs tensor contraction: tensor0 += tensor1 * tensor2 * alpha **/
 template<typename NumericType>
824
825
 bool contractTensors(const std::string & contraction, //in: symbolic tensor contraction specification
                      NumericType alpha);              //in: alpha prefactor
826

827
828
829
830
 template<typename NumericType>
 bool contractTensorsSync(const std::string & contraction, //in: symbolic tensor contraction specification
                          NumericType alpha);              //in: alpha prefactor

831
832
833
834
835
836
837
 /** Decomposes a tensor into three tensor factors via SVD. The symbolic
     tensor contraction specification specifies the decomposition,
     for example:
      D(a,b,c,d,e) = L(c,i,e,j) * S(i,j) * R(b,j,a,i,d)
     where
      L(c,i,e,j) is the left SVD factor,
      R(b,j,a,i,d) is the right SVD factor,
838
839
      S(i,j) is the middle SVD factor (the diagonal with singular values).
     Note that the ordering of the contracted indices is not guaranteed! **/
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
 bool decomposeTensorSVD(const std::string & contraction); //in: three-factor symbolic tensor contraction specification

 bool decomposeTensorSVDSync(const std::string & contraction); //in: three-factor symbolic tensor contraction specification

 /** Decomposes a tensor into two tensor factors via SVD. The symbolic
     tensor contraction specification specifies the decomposition,
     for example:
      D(a,b,c,d,e) = L(c,i,e,j) * R(b,j,a,i,d)
     where
      L(c,i,e,j) is the left SVD factor with absorbed singular values,
      R(b,j,a,i,d) is the right SVD factor. **/
 bool decomposeTensorSVDL(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 bool decomposeTensorSVDLSync(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 /** Decomposes a tensor into two tensor factors via SVD. The symbolic
     tensor contraction specification specifies the decomposition,
     for example:
      D(a,b,c,d,e) = L(c,i,e,j) * R(b,j,a,i,d)
     where
      L(c,i,e,j) is the left SVD factor,
      R(b,j,a,i,d) is the right SVD factor with absorbed singular values. **/
 bool decomposeTensorSVDR(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 bool decomposeTensorSVDRSync(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 /** Decomposes a tensor into two tensor factors via SVD. The symbolic
     tensor contraction specification specifies the decomposition,
     for example:
      D(a,b,c,d,e) = L(c,i,e,j) * R(b,j,a,i,d)
     where
      L(c,i,e,j) is the left SVD factor with absorbed square root of singular values,
      R(b,j,a,i,d) is the right SVD factor with absorbed square root of singular values. **/
 bool decomposeTensorSVDLR(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 bool decomposeTensorSVDLRSync(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 /** Orthogonalizes a tensor by decomposing it via SVD while discarding
878
879
     the middle tensor factor with singular values. The symbolic tensor contraction
     specification specifies the decomposition. It must contain strictly one contracted index! **/
880
881
882
883
 bool orthogonalizeTensorSVD(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

 bool orthogonalizeTensorSVDSync(const std::string & contraction); //in: two-factor symbolic tensor contraction specification

884
 /** Orthogonalizes an isometric tensor over its isometric dimensions via the modified Gram-Schmidt procedure.  **/
885
886
887
888
 bool orthogonalizeTensorMGS(const std::string & name); //in: tensor name

 bool orthogonalizeTensorMGSSync(const std::string & name); //in: tensor name

889
890
891
892
893
894
895
896
897
898
899
900
 /** Prints a tensor to the standard output. **/
 bool printTensor(const std::string & name); //in: tensor name

 bool printTensorSync(const std::string & name); //in: tensor name

 /** Prints a tensor to a file. **/
 bool printTensorFile(const std::string & name,      //in: tensor name
                      const std::string & filename); //in: file name

 bool printTensorFileSync(const std::string & name,      //in: tensor name
                          const std::string & filename); //in: file name

901
902
 /** Performs a full evaluation of a tensor network based on the symbolic
     specification involving already created tensors (including the output). **/
903
904
905
906
907
908
909
910
911
912
913
 bool evaluateTensorNetwork(const std::string & name,           //in: tensor network name
                            const std::string & network);       //in: symbolic tensor network specification
 bool evaluateTensorNetwork(const ProcessGroup & process_group, //in: chosen group of MPI processes
                            const std::string & name,           //in: tensor network name
                            const std::string & network);       //in: symbolic tensor network specification

 bool evaluateTensorNetworkSync(const std::string & name,           //in: tensor network name
                                const std::string & network);       //in: symbolic tensor network specification
 bool evaluateTensorNetworkSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                const std::string & name,           //in: tensor network name
                                const std::string & network);       //in: symbolic tensor network specification
914

915
 /** Normalizes a tensor to a given 2-norm. **/
916
917
918
 bool normalizeNorm2Sync(const std::string & name,          //in: tensor name
                         double norm = 1.0,                 //in: desired 2-norm
                         double * original_norm = nullptr); //out: original 2-norm
919

920
921
922
 /** Normalizes a tensor network expansion to a given 2-norm by rescaling
     all tensor network components by the same factor: Only the tensor
     network expansion coefficients are affected. **/
923
924
925
 bool normalizeNorm2Sync(TensorExpansion & expansion,       //inout: tensor network expansion
                         double norm = 1.0,                 //in: desired 2-norm
                         double * original_norm = nullptr); //out: original 2-norm
926

927
 bool normalizeNorm2Sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
928
                         TensorExpansion & expansion,        //inout: tensor network expansion
929
930
                         double norm = 1.0,                  //in: desired 2-norm
                         double * original_norm = nullptr);  //out: original 2-norm
931

932
933
934
935
936
 /** Normalizes all input tensors in a tensor network to a given 2-norm.
     If only_optimizable is TRUE, only optimizable tensors will be normalized. **/
 bool balanceNorm2Sync(TensorNetwork & network,        //inout: tensor network
                       double norm,                    //in: desired 2-norm
                       bool only_optimizable = false); //in: whether to normalize only optimizable tensors
937
938
939

 bool balanceNorm2Sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                       TensorNetwork & network,            //inout: tensor network
940
941
                       double norm,                        //in: desired 2-norm
                       bool only_optimizable = false);     //in: whether to normalize only optimizable tensors
942

943
944
945
946
947
 /** Normalizes all input tensors in a tensor network expansion to a given 2-norm.
     If only_optimizable is TRUE, only optimizable tensors will be normalized. **/
 bool balanceNorm2Sync(TensorExpansion & expansion,    //inout: tensor network expansion
                       double norm,                    //in: desired 2-norm
                       bool only_optimizable = false); //in: whether to normalize only optimizable tensors
948
949
950

 bool balanceNorm2Sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                       TensorExpansion & expansion,        //inout: tensor network expansion
951
952
                       double norm,                        //in: desired 2-norm
                       bool only_optimizable = false);     //in: whether to normalize only optimizable tensors
953

954
955
 /** Normalizes all input tensors in a tensor network expansion to a given 2-norm
     and rescales tensor network expansion coefficients to normalize the entire
956
957
958
959
960
961
     tensor network expansion to another given 2-norm. If only_optimizable is TRUE,
     only optimizable tensors will be normalized. **/
 bool balanceNormalizeNorm2Sync(TensorExpansion & expansion,    //inout: tensor network expansion
                                double tensor_norm = 1.0,       //in: desired 2-norm of each input tensor
                                double expansion_norm = 1.0,    //in: desired 2-norm of the tensor network expansion
                                bool only_optimizable = false); //in: whether to normalize only optimizable tensors
962
963
964
965

 bool balanceNormalizeNorm2Sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                TensorExpansion & expansion,        //inout: tensor network expansion
                                double tensor_norm = 1.0,           //in: desired 2-norm of each input tensor
966
967
                                double expansion_norm = 1.0,        //in: desired 2-norm of the tensor network expansion
                                bool only_optimizable = false);     //in: whether to normalize only optimizable tensors
968

969
970
 /** Duplicates a given tensor network as a new tensor network copy.
     The name of the tensor network copy will be prepended with an underscore
971
     whereas all copies of the input tensors will be renamed with their unique (local) hashes. **/
972
973
974
975
976
977
 std::shared_ptr<TensorNetwork> duplicateSync(const TensorNetwork & network); //in: tensor network

 std::shared_ptr<TensorNetwork> duplicateSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                              const TensorNetwork & network);     //in: tensor network

 /** Duplicates a given tensor network expansion as a new tensor network expansion copy.
978
979
     The name of the tensor network expansion copy will be prepended with an underscore
     whereas all copies of the input tensors will be renamed with their unique (local) hashes. **/
980
981
982
983
984
 std::shared_ptr<TensorExpansion> duplicateSync(const TensorExpansion & expansion); //in: tensor expansion

 std::shared_ptr<TensorExpansion> duplicateSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                                const TensorExpansion & expansion); //in: tensor expansion

985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
 /** Projects a given tensor network to a chosen slice of its full output tensor. **/
 std::shared_ptr<TensorNetwork> projectSliceSync(const TensorNetwork & network, //in: tensor network
                                                 const Tensor & slice);         //in: desired slice of the output tensor

 std::shared_ptr<TensorNetwork> projectSliceSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                                 const TensorNetwork & network,      //in: tensor network
                                                 const Tensor & slice);              //in: desired slice of the output tensor

 /** Projects a given tensor network expansion to a chosen slice of its full output tensor. **/
 std::shared_ptr<TensorExpansion> projectSliceSync(const TensorExpansion & expansion, //in: tensor network expansion
                                                   const Tensor & slice);             //in: desired slice of the output tensor

 std::shared_ptr<TensorExpansion> projectSliceSync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                                                   const TensorExpansion & expansion,  //in: tensor network expansion
                                                   const Tensor & slice);              //in: desired slice of the output tensor