NumServerTester.cpp 168 KB
Newer Older
1
2
3
#include <gtest/gtest.h>

#include "exatn.hpp"
4
#include "quantum.hpp"
5
#include "talshxx.hpp"
6

7
8
9
10
#ifdef MPI_ENABLED
#include "mpi.h"
#endif

11
#include <iostream>
12
#include <ios>
13
#include <utility>
14
#include <numeric>
15
16
#include <chrono>
#include <thread>
17

18
19
#include "errors.hpp"

20
//Test activation:
21

22
#define EXATN_TEST0
23
#define EXATN_TEST1
24
/*#define EXATN_TEST2
25
26
#define EXATN_TEST3
#define EXATN_TEST4
27
#define EXATN_TEST5
28
#define EXATN_TEST6
29
#define EXATN_TEST7
30
31
32
#define EXATN_TEST8
#define EXATN_TEST9
#define EXATN_TEST10
33
#define EXATN_TEST11
34
#define EXATN_TEST12
35
#define EXATN_TEST13
36
#define EXATN_TEST14
37
//#define EXATN_TEST15 //buggy (parsed named spaces/subspaces)
38
#define EXATN_TEST16
39
40
//#define EXATN_TEST17 //MKL only (tensor hyper-contraction)
#define EXATN_TEST18
41
#define EXATN_TEST19
42
#define EXATN_TEST20
43
#define EXATN_TEST21*/
44
#define EXATN_TEST22
45
#define EXATN_TEST23
46
#define EXATN_TEST24
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
47
#define EXATN_TEST25
48
#define EXATN_TEST26
49
50
#define EXATN_TEST27 //requires input file from source
#define EXATN_TEST28 //requires input file from source
51
#define EXATN_TEST29
52
53
#define EXATN_TEST30
#define EXATN_TEST31 //requires input file from source
54
#define EXATN_TEST32
55
#define EXATN_TEST33
56

57

58
#ifdef EXATN_TEST0
59
60
61
62
63
64
65
66
67
68
TEST(NumServerTester, PerformanceExaTN)
{
 using exatn::Tensor;
 using exatn::TensorShape;
 using exatn::TensorSignature;
 using exatn::TensorNetwork;
 using exatn::TensorOperator;
 using exatn::TensorExpansion;
 using exatn::TensorElementType;

69
70
 const exatn::DimExtent DIM = 1024; //CPU: 1024 for low-end CPU, 2048 for high-end CPU
                                    //3072 for Maxwell, 4096 for Pascal and Volta
71
 const auto TENS_ELEM_TYPE = TensorElementType::REAL32;
72

73
 exatn::resetLoggingLevel(1,2); //debug
74

75
76
 //exatn::resetExecutionSerialization(true,true); //debug

77
 //exatn::activateFastMath(); //fast math (mixed-precision)
78

79
 bool success = true;
80
81
 bool root = (exatn::getProcessRank() == 0);
 if(root) std::cout << "Contractions of rank-2 tensors:" << std::endl;
82

83
 //Create tensors:
84
 if(root) std::cout << " Creating all tensors ... ";
85
86
 success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
 success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
87
88
89
90
91
92
93
 success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
 success = exatn::createTensor("D",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
 success = exatn::createTensor("E",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
 success = exatn::createTensor("F",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
 success = exatn::createTensor("G",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
 success = exatn::createTensor("H",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
 success = exatn::createTensor("I",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
94
 if(root) std::cout << "Done\n";
95

96
 //Initialize tensors:
97
 if(root) std::cout << " Initializing all tensors ... ";
98
99
 success = exatn::initTensor("A",1e-4); assert(success);
 success = exatn::initTensor("B",1e-3); assert(success);
100
101
102
103
104
105
106
 success = exatn::initTensor("C",0.0); assert(success);
 success = exatn::initTensor("D",1e-4); assert(success);
 success = exatn::initTensor("E",1e-3); assert(success);
 success = exatn::initTensor("F",0.0); assert(success);
 success = exatn::initTensor("G",1e-4); assert(success);
 success = exatn::initTensor("H",1e-3); assert(success);
 success = exatn::initTensor("I",0.0); assert(success);
107
 if(root) std::cout << "Done\n";
108

109
110
111
 std::this_thread::sleep_for(std::chrono::microseconds(100000));

 //Contract tensors (case 0):
112
 if(root) std::cout << " Case 0: C=A*B five times: Warm-up: ";
113
 success = exatn::sync(); assert(success);
114
 auto time_start = exatn::Timer::timeInSecHR();
115
116
117
118
119
 success = exatn::contractTensors("C(i,j)+=A(k,i)*B(k,j)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(i,k)*B(k,j)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(k,i)*B(j,k)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(i,k)*B(j,k)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(k,i)*B(k,j)",1.0); assert(success);
120
 success = exatn::sync(); assert(success);
121
 auto duration = exatn::Timer::timeInSecHR(time_start);
122
 if(root) std::cout << "Average performance (GFlop/s) = " << 5.0*2.0*double{DIM}*double{DIM}*double{DIM}/duration/1e9 << std::endl;
123

124
125
126
 std::this_thread::sleep_for(std::chrono::microseconds(100000));

 //Contract tensors (case 1):
127
 if(root) std::cout << " Case 1: C=A*B five times: Reuse: ";
128
129
130
131
132
133
134
135
136
 success = exatn::sync(); assert(success);
 time_start = exatn::Timer::timeInSecHR();
 success = exatn::contractTensors("C(i,j)+=A(k,i)*B(k,j)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(i,k)*B(k,j)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(k,i)*B(j,k)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(i,k)*B(j,k)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(k,i)*B(k,j)",1.0); assert(success);
 success = exatn::sync(); assert(success);
 duration = exatn::Timer::timeInSecHR(time_start);
137
 if(root) std::cout << "Average performance (GFlop/s) = " << 5.0*2.0*double{DIM}*double{DIM}*double{DIM}/duration/1e9 << std::endl;
138
139
140

 std::this_thread::sleep_for(std::chrono::microseconds(100000));

141
 //Contract tensors (case 2):
142
 if(root) std::cout << " Case 2: C=A*B | F=D*E | I=G*H: Pipeline: ";
143
 success = exatn::sync(); assert(success);
144
145
146
147
 time_start = exatn::Timer::timeInSecHR();
 success = exatn::contractTensors("I(i,j)+=G(j,k)*H(i,k)",1.0); assert(success);
 success = exatn::contractTensors("F(i,j)+=D(j,k)*E(i,k)",1.0); assert(success);
 success = exatn::contractTensors("C(i,j)+=A(j,k)*B(i,k)",1.0); assert(success);
148
 success = exatn::sync(); assert(success);
149
 duration = exatn::Timer::timeInSecHR(time_start);
150
 if(root) std::cout << "Average performance (GFlop/s) = " << 3.0*2.0*double{DIM}*double{DIM}*double{DIM}/duration/1e9 << std::endl;
151

152
153
 std::this_thread::sleep_for(std::chrono::microseconds(100000));

154
 //Contract tensors (case 3):
155
 if(root) std::cout << " Case 3: I=A*B | I=D*E | I=G*H: Prefetch: ";
156
 success = exatn::sync(); assert(success);
157
158
159
160
 time_start = exatn::Timer::timeInSecHR();
 success = exatn::contractTensors("I(i,j)+=G(j,k)*H(i,k)",1.0); assert(success);
 success = exatn::contractTensors("I(i,j)+=D(j,k)*E(i,k)",1.0); assert(success);
 success = exatn::contractTensors("I(i,j)+=A(j,k)*B(i,k)",1.0); assert(success);
161
 success = exatn::sync(); assert(success);
162
 duration = exatn::Timer::timeInSecHR(time_start);
163
 if(root) std::cout << "Average performance (GFlop/s) = " << 3.0*2.0*double{DIM}*double{DIM}*double{DIM}/duration/1e9 << std::endl;
164

165
166
 std::this_thread::sleep_for(std::chrono::microseconds(100000));

167
 //Destroy tensors:
168
 if(root) std::cout << " Destroying all tensors ... ";
169
170
171
172
173
174
175
 success = exatn::destroyTensor("I"); assert(success);
 success = exatn::destroyTensor("H"); assert(success);
 success = exatn::destroyTensor("G"); assert(success);
 success = exatn::destroyTensor("F"); assert(success);
 success = exatn::destroyTensor("E"); assert(success);
 success = exatn::destroyTensor("D"); assert(success);
 success = exatn::destroyTensor("C"); assert(success);
176
177
 success = exatn::destroyTensor("B"); assert(success);
 success = exatn::destroyTensor("A"); assert(success);
178
 if(root) std::cout << "Done\n";
179

180
 success = exatn::sync(); assert(success);
181

182
 //Create tensors:
183
 if(root) std::cout << " Creating all tensors ... ";
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
184
185
 success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{DIM,DIM,32ULL}); assert(success);
 success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{DIM,DIM,32ULL}); assert(success);
186
 success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{DIM,DIM}); assert(success);
187
 if(root) std::cout << "Done\n";
188
189

 //Initialize tensors:
190
 if(root) std::cout << " Initializing all tensors ... ";
191
192
193
 success = exatn::initTensor("A",1e-4); assert(success);
 success = exatn::initTensor("B",1e-3); assert(success);
 success = exatn::initTensor("C",0.0); assert(success);
194
 if(root) std::cout << "Done\n";
195
196

 //Contract tensors:
197
 if(root) std::cout << " Case 4: C=A*B: Out-of-core large dims: ";
198
 success = exatn::sync(); assert(success);
199
 time_start = exatn::Timer::timeInSecHR();
Dmitry I. Lyakh's avatar
Dmitry I. Lyakh committed
200
 success = exatn::contractTensors("C(i,j)+=A(j,k,l)*B(i,k,l)",1.0); assert(success);
201
 success = exatn::sync(); assert(success);
202
 duration = exatn::Timer::timeInSecHR(time_start);
203
 if(root) std::cout << "Average performance (GFlop/s) = " << 2.0*double{DIM}*double{DIM}*double{DIM}*double{32}/duration/1e9 << std::endl;
204

205
206
 std::this_thread::sleep_for(std::chrono::microseconds(100000));

207
 //Destroy tensors:
208
 if(root) std::cout << " Destroying all tensors ... ";
209
210
211
 success = exatn::destroyTensor("C"); assert(success);
 success = exatn::destroyTensor("B"); assert(success);
 success = exatn::destroyTensor("A"); assert(success);
212
 if(root) std::cout << "Done\n";
213

214
 success = exatn::sync(); assert(success);
215

216
217
218
219
220
221
222
223
224
225
226
227
/* REQUIRES at least 48 GB Host RAM:
 //Create tensors:
 success = exatn::createTensor("A",TensorElementType::COMPLEX64,TensorShape{2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2}); assert(success);
 success = exatn::createTensor("B",TensorElementType::COMPLEX64,TensorShape{2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,2,2,2,1,2,2,1,1,2,1,2,2,2,2,2,2,1,2,1,2,1,2,2,2,1}); assert(success);
 success = exatn::createTensor("C",TensorElementType::COMPLEX64,TensorShape{2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,2,1,2,2,1,1,1,1,2,1,1,2,1,2,2,2}); assert(success);

 //Initialize tensors:
 success = exatn::initTensor("A",1e-4); assert(success);
 success = exatn::initTensor("B",1e-3); assert(success);
 success = exatn::initTensor("C",0.0); assert(success);

 //Contract tensors:
228
 if(root) std::cout << " Case 5: C=A*B out-of-core small dims: ";
229
 success = exatn::sync(); assert(success);
230
231
232
233
 time_start = exatn::Timer::timeInSecHR();
 success = exatn::contractTensors("C(c49,c40,c13,c50,c47,c14,c15,c41,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c45,c30,c44,c43,c31,c32,c33,c34,c48,c35,c36,c42,c37,c39,c38,c46)+="
  "A(c49,c40,c13,c50,c62,c47,c14,c15,c63,c41,c64,c65,c16,c66,c67,c68,c69,c17,c18,c19,c70,c71,c72,c73,c74,c75)*"
  "B(c20,c21,c64,c22,c69,c67,c23,c24,c25,c26,c27,c28,c29,c45,c30,c44,c68,c43,c31,c73,c72,c32,c33,c66,c34,c75,c74,c71,c65,c48,c70,c35,c63,c36,c42,c37,c39,c38,c46,c62)",1.0); assert(success);
234
 success = exatn::sync(); assert(success);
235
 duration = exatn::Timer::timeInSecHR(time_start);
236
 if(root) std::cout << "Average performance (GFlop/s) = " << 8.0*1.099512e3/duration << std::endl;
237
238
239
240
241
242

 //Destroy tensors:
 success = exatn::destroyTensor("C"); assert(success);
 success = exatn::destroyTensor("B"); assert(success);
 success = exatn::destroyTensor("A"); assert(success);

243
 success = exatn::sync(); assert(success); */
244

245
 if(root) std::cout << "Tensor decomposition:" << std::endl;
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
 //Create tensors:
 success = exatn::createTensor("D",TENS_ELEM_TYPE,TensorShape{32,32,32,1}); assert(success);
 success = exatn::createTensor("L",TENS_ELEM_TYPE,TensorShape{32,32,32}); assert(success);
 success = exatn::createTensor("R",TENS_ELEM_TYPE,TensorShape{32,32,1}); assert(success);

 //Initialize tensors:
 success = exatn::initTensorRnd("D"); assert(success);
 success = exatn::initTensor("L",0.0); assert(success);
 success = exatn::initTensor("R",0.0); assert(success);

 //Normalize tensor D:
 double norm1 = 0.0;
 success = exatn::computeNorm1Sync("D",norm1); assert(success);
 success = exatn::scaleTensor("D",1.0/norm1); assert(success);

 //Decompose tensor D:
 success = exatn::decomposeTensorSVDLRSync("D(u0,u1,u2,u3)=L(u0,c0,u1)*R(u2,c0,u3)"); assert(success);

 //Contract tensor factors back with an opposite sign:
 success = exatn::contractTensors("D(u0,u1,u2,u3)+=L(u0,c0,u1)*R(u2,c0,u3)",-1.0); assert(success);
 success = exatn::computeNorm1Sync("D",norm1); assert(success);
267
 if(root) std::cout << " Final 1-norm of tensor D (should be close to zero) = " << norm1 << std::endl;
268
269
270
271
272
273

 //Destroy tensors:
 success = exatn::destroyTensor("R"); assert(success);
 success = exatn::destroyTensor("L"); assert(success);
 success = exatn::destroyTensor("D"); assert(success);

274
 //Synchronize ExaTN server:
275
 success = exatn::sync(); assert(success);
276
 //exatn::resetLoggingLevel(0,0);
277
278
279
280
}
#endif

#ifdef EXATN_TEST1
281
TEST(NumServerTester, ExamplarExaTN)
282
283
284
285
286
287
288
289
290
291
292
293
294
{
 using exatn::Tensor;
 using exatn::TensorShape;
 using exatn::TensorSignature;
 using exatn::TensorNetwork;
 using exatn::TensorOperator;
 using exatn::TensorExpansion;
 using exatn::TensorElementType;

 const exatn::DimExtent OC_RANGE = 30;
 const exatn::DimExtent VI_RANGE = 60;
 const auto TENS_ELEM_TYPE = TensorElementType::REAL32;

295
 //exatn::resetLoggingLevel(1,2); //debug
296

297
 bool success = true;
298

299
300
301
302
303
304
305
306
307
308
309
310
 //Create different process groups:
 const auto global_rank = exatn::getProcessRank();
 const auto total_processes = exatn::getNumProcesses();
 const auto & all_processes = exatn::getDefaultProcessGroup();
 const auto & current_process = exatn::getCurrentProcessGroup();
 std::shared_ptr<exatn::ProcessGroup> me_plus_next, me_plus_prev;
 if(total_processes > 1){
  int color = global_rank / 2; if(global_rank == (total_processes - 1)) color = -1;
  auto me_plus_next = all_processes.split(color);
  color = (global_rank + 1) / 2; if(global_rank == 0) color = -1;
  auto me_plus_prev = all_processes.split(color);
 }
311
 success = exatn::sync(); assert(success);
312

313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
 //Declare and then create (allocate) a tensor (in two steps):
 auto z2 = exatn::makeSharedTensor("Z2",TensorShape{VI_RANGE,VI_RANGE,OC_RANGE,OC_RANGE}); //declares tensor Z2 with no storage
 success = exatn::createTensor(z2,TENS_ELEM_TYPE); assert(success); //allocates REAL64 storage for tensor Z2

 //Create tensors in one step (with allocated storage):
 success = exatn::createTensor("Y2",TENS_ELEM_TYPE,TensorShape{VI_RANGE,VI_RANGE,OC_RANGE,OC_RANGE}); assert(success);
 success = exatn::createTensor("T2",TENS_ELEM_TYPE,TensorShape{VI_RANGE,VI_RANGE,OC_RANGE,OC_RANGE}); assert(success);
 success = exatn::createTensor("S2",TENS_ELEM_TYPE,TensorShape{VI_RANGE,VI_RANGE,OC_RANGE,OC_RANGE}); assert(success);
 success = exatn::createTensor("H2",TENS_ELEM_TYPE,TensorShape{VI_RANGE,VI_RANGE,VI_RANGE,VI_RANGE}); assert(success);
 success = exatn::createTensor("W2",TENS_ELEM_TYPE,TensorShape{VI_RANGE,VI_RANGE,VI_RANGE,VI_RANGE}); assert(success);
 success = exatn::createTensor("ENERGY",TENS_ELEM_TYPE); assert(success); //just a scalar

 //Initialize tensors to a scalar value:
 success = exatn::initTensor("Z2",0.0); assert(success);
 success = exatn::initTensor("Y2",0.0); assert(success);
 success = exatn::initTensor("T2",1e-4); assert(success);
 success = exatn::initTensor("S2",2e-4); assert(success);
 success = exatn::initTensor("H2",1e-3); assert(success);
 success = exatn::initTensor("W2",2e-3); assert(success);
 success = exatn::initTensor("ENERGY",0.0); assert(success);

 //Perform binary tensor contractions (scaled by a scalar):
 success = exatn::contractTensors("Z2(a,b,i,j)+=T2(d,c,j,i)*H2(c,b,d,a)",0.5); assert(success);
 success = exatn::contractTensors("Y2(a,b,i,j)+=S2(c,d,j,i)*W2(b,d,a,c)",1.0); assert(success);
 success = exatn::contractTensors("ENERGY()+=Z2(a,b,i,j)*Z2(a,b,i,j)",0.25); assert(success);
 success = exatn::contractTensors("ENERGY()+=Y2(a,b,i,j)*Y2(a,b,i,j)",0.25); assert(success);

 //Synchronize ExaTN server:
341
 success = exatn::sync(); assert(success);
342
343
344
345
346
347
348
349
350
351
352
353
354

 //Compute 2-norms (synchronously):
 double norm2 = 0.0;
 success = exatn::computeNorm2Sync("Z2",norm2); assert(success);
 std::cout << "Z2 2-norm = " << std::scientific << norm2 << std::endl << std::flush;
 norm2 = 0.0;
 success = exatn::computeNorm2Sync("Y2",norm2); assert(success);
 std::cout << "Y2 2-norm = " << std::scientific << norm2 << std::endl << std::flush;
 norm2 = 0.0;
 success = exatn::computeNorm2Sync("ENERGY",norm2); assert(success);
 std::cout << "ENERGY 2-norm = " << std::scientific << norm2 << std::endl << std::flush;

 //Retrieve scalar ENERGY:
355
356
357
358
359
360
361
362
 auto local_copy = exatn::getLocalTensor("ENERGY"); assert(local_copy);
 const exatn::TensorDataType<TENS_ELEM_TYPE>::value * body_ptr;
 auto access_granted = local_copy->getDataAccessHostConst(&body_ptr); assert(access_granted);
 std::cout << "ENERGY value = " << *body_ptr << " VS correct value of "
           << std::pow(std::pow(double{VI_RANGE},2)*(1e-4)*(1e-3)*0.5,2)*std::pow(double{VI_RANGE},2)*std::pow(double{OC_RANGE},2)*0.25
            + std::pow(std::pow(double{VI_RANGE},2)*(2e-4)*(2e-3)*1.0,2)*std::pow(double{VI_RANGE},2)*std::pow(double{OC_RANGE},2)*0.25
           << std::endl << std::flush;
 body_ptr = nullptr;
363
364
365
366
367
368
369
370
371
 //Retrieve scalar via talsh::Tensor::View:
 auto scalar_view = local_copy->getSliceView<exatn::TensorDataType<TENS_ELEM_TYPE>::value>(); //scalar view
 std::cout << "ENERGY value (via tensor view) = " << scalar_view[std::initializer_list<int>{}] << std::endl;
 local_copy.reset();

 //Access a tensor element directly via talsh::Tensor::View:
 local_copy = exatn::getLocalTensor("Z2"); assert(local_copy);
 auto tensor_view = local_copy->getSliceView<exatn::TensorDataType<TENS_ELEM_TYPE>::value>(); //full tensor view
 std::cout << "Z2[2,3,1,0] = " << tensor_view[{2,3,1,0}] << std::endl;
372
 local_copy.reset();
373
374

 //Synchronize ExaTN server:
375
 success = exatn::sync(); assert(success);
376
377
378
379
380
381
382
383
384
385
386
387

 //Destroy all tensors:
 success = exatn::destroyTensor("ENERGY"); assert(success);
 success = exatn::destroyTensor("W2"); assert(success);
 success = exatn::destroyTensor("H2"); assert(success);
 success = exatn::destroyTensor("S2"); assert(success);
 success = exatn::destroyTensor("T2"); assert(success);
 success = exatn::destroyTensor("Y2"); assert(success);
 success = exatn::destroyTensor("Z2"); assert(success);
 z2.reset();

 //Synchronize ExaTN server:
388
 success = exatn::sync(all_processes); assert(success);
389
 //exatn::resetLoggingLevel(0,0);
390
391
392
}
#endif

393
#ifdef EXATN_TEST2
394
TEST(NumServerTester, ParallelExaTN)
395
396
397
398
399
400
401
402
403
{
 using exatn::Tensor;
 using exatn::TensorShape;
 using exatn::TensorSignature;
 using exatn::TensorNetwork;
 using exatn::TensorOperator;
 using exatn::TensorExpansion;
 using exatn::TensorElementType;

404
 //exatn::resetLoggingLevel(1,2); //debug
405

406
 bool success = true;
407

408
409
410
411
412
413
 //For multi-process execution:
 auto process_rank = exatn::getProcessRank(); //global rank of the current process
 exatn::ProcessGroup myself(exatn::getCurrentProcessGroup()); //process group containing only the current process
 exatn::ProcessGroup all_processes(exatn::getDefaultProcessGroup()); //group of all processes

 //All processes: Declare and then separately create a tensor:
414
 auto z0 = exatn::makeSharedTensor("Z0",TensorShape{16,16,16,16}); //declares a tensor Z0[16,16,16,16] with no storage
415
 success = exatn::createTensor(z0,TensorElementType::REAL32); assert(success); //allocates storage for tensor Z0
416

417
 //All processes: Create tensors in one shot (with storage):
418
419
420
421
422
423
 success = exatn::createTensor("T0",TensorElementType::REAL32,TensorShape{16,16}); assert(success);
 success = exatn::createTensor("T1",TensorElementType::REAL32,TensorShape{32,16,32,32}); assert(success);
 success = exatn::createTensor("T2",TensorElementType::REAL32,TensorShape{32,16,32,32}); assert(success);
 success = exatn::createTensor("T3",TensorElementType::REAL32,TensorShape{32,16,32,32}); assert(success);
 success = exatn::createTensor("T4",TensorElementType::REAL32,TensorShape{32,16,32,32}); assert(success);

424
 //All processes: Initialize tensors to a scalar value:
425
426
 success = exatn::initTensor("Z0",0.0); assert(success);
 success = exatn::initTensor("T0",0.0); assert(success);
427
428
429
430
 success = exatn::initTensor("T1",0.01); assert(success);
 success = exatn::initTensor("T2",0.001); assert(success);
 success = exatn::initTensor("T3",0.0001); assert(success);
 success = exatn::initTensor("T4",0.00001); assert(success);
431

432
 //All processes: Scale a tensor by a scalar:
433
 success = exatn::scaleTensor("T3",0.5); assert(success);
434

435
 //All processes: Accumulate a scaled tensor into another tensor:
436
 success = exatn::addTensors("T2(i,j,k,l)+=T4(i,j,k,l)",0.25); assert(success);
437

438
 //All processes: Contract two tensors (scaled by a scalar) and accumulate the result into another tensor:
439
 success = exatn::contractTensors("T0(i,j)+=T2(c,i,d,e)*T3(d,j,e,c)",0.125); assert(success);
440

441
442
443
444
445
446
447
448
449
450
451
 //All processes: Evaluate the entire tensor network in one shot with a given memory limit per process:
 std::cout << "Original memory limit per process = " << all_processes.getMemoryLimitPerProcess() << std::endl;
 all_processes.resetMemoryLimitPerProcess(exatn::getMemoryBufferSize()/8);
 std::cout << "Corrected memory limit per process = " << all_processes.getMemoryLimitPerProcess() << std::endl;
 success = exatn::evaluateTensorNetwork(all_processes,"FullyConnectedStar",
           "Z0(i,j,k,l)+=T1(d,i,a,e)*T2(a,j,b,f)*T3(b,k,c,e)*T4(c,l,d,f)");
 assert(success);
 //All processes: Synchronize on the computed output tensor Z0:
 success = exatn::sync(all_processes,"Z0"); assert(success);

 //All processes: Compute 2-norm of Z0 (synchronously):
452
 double norm2 = 0.0;
453
 success = exatn::computeNorm2Sync("Z0",norm2); assert(success);
454
455
 std::cout << "Z0 2-norm = " << norm2 << std::endl << std::flush;

456
457
458
459
460
461
462
463
 //Process 0: Compute 2-norm of Z0 by a tensor contraction (synchronously):
 if(process_rank == 0){
  success = exatn::createTensor("S0",TensorElementType::REAL32); assert(success);
  success = exatn::initTensor("S0",0.0); assert(success);
  success = exatn::contractTensorsSync("S0()+=Z0+(i,j,k,l)*Z0(i,j,k,l)",1.0); assert(success);
 }
 //All processes: Replicate tensor S0 to all processes (synchronously):
 success = exatn::replicateTensorSync(all_processes,"S0",0); assert(success);
464
 //All processes: Retrive a copy of tensor S0 locally:
465
 auto talsh_tensor = exatn::getLocalTensor("S0");
466

467
 //All processes: Destroy all tensors:
468
 success = exatn::destroyTensor("S0"); assert(success);
469
470
471
472
473
474
475
476
 success = exatn::destroyTensor("T4"); assert(success);
 success = exatn::destroyTensor("T3"); assert(success);
 success = exatn::destroyTensor("T2"); assert(success);
 success = exatn::destroyTensor("T1"); assert(success);
 success = exatn::destroyTensor("T0"); assert(success);
 success = exatn::destroyTensor("Z0"); assert(success);
 z0.reset();

477
478
 //All processes: Synchronize ExaTN server:
 success = exatn::sync(all_processes); assert(success);
479
 //exatn::resetLoggingLevel(0,0);
480
481
}
#endif
482

483
#ifdef EXATN_TEST3
484
485
TEST(NumServerTester, superEasyNumServer)
{
486
487
 using exatn::Tensor;
 using exatn::TensorShape;
488
489
 using exatn::TensorElementType;

490
 //exatn::resetLoggingLevel(1,2); //debug
491

492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
 //Example of tensor network processing:
 //3-site MPS closure with 2-body Hamiltonian applied to sites 0 and 1:
 //Z0() = T0(a,b) * T1(b,c,d) * T2(d,e) * H0(a,c,f,g) * S0(f,h) * S1(h,g,i) * S2(i,e)
 // 0      1         2           3         4             5         6           7  <-- tensor id

 //Create tensors:
 auto created = false;
 created = exatn::createTensor("Z0",TensorElementType::REAL64); assert(created);
 created = exatn::createTensor("T0",TensorElementType::REAL64,TensorShape{2,2}); assert(created);
 created = exatn::createTensor("T1",TensorElementType::REAL64,TensorShape{2,2,2}); assert(created);
 created = exatn::createTensor("T2",TensorElementType::REAL64,TensorShape{2,2}); assert(created);
 created = exatn::createTensor("H0",TensorElementType::REAL64,TensorShape{2,2,2,2}); assert(created);
 created = exatn::createTensor("S0",TensorElementType::REAL64,TensorShape{2,2}); assert(created);
 created = exatn::createTensor("S1",TensorElementType::REAL64,TensorShape{2,2,2}); assert(created);
 created = exatn::createTensor("S2",TensorElementType::REAL64,TensorShape{2,2}); assert(created);

 //Initialize tensors:
 auto initialized = false;
 initialized = exatn::initTensor("Z0",0.0); assert(initialized);
511
512
513
514
515
516
517
 initialized = exatn::initTensor("T0",0.01); assert(initialized);
 initialized = exatn::initTensor("T1",0.01); assert(initialized);
 initialized = exatn::initTensor("T2",0.01); assert(initialized);
 initialized = exatn::initTensor("H0",0.01); assert(initialized);
 initialized = exatn::initTensor("S0",0.01); assert(initialized);
 initialized = exatn::initTensor("S1",0.01); assert(initialized);
 initialized = exatn::initTensor("S2",0.01); assert(initialized);
518
519
520
521
522
523
524
525
526
527

 //Evaluate a tensor network:
 auto evaluated = false;
 evaluated = exatn::evaluateTensorNetwork("{0,1} 3-site MPS closure",
  "Z0() = T0(a,b) * T1(b,c,d) * T2(d,e) * H0(a,c,f,g) * S0(f,h) * S1(h,g,i) * S2(i,e)");

 //Sync all operations on Z0:
 auto synced = false;
 synced = exatn::sync("Z0"); assert(synced);

528
 //Retrieve the result (Z0):
529
 auto access_granted = false;
530
 auto talsh_tensor = exatn::getLocalTensor("Z0");
531
532
533
534
 const double * body_ptr;
 access_granted = talsh_tensor->getDataAccessHostConst(&body_ptr); assert(access_granted);
 std::cout << "Final Z0 value = " << *body_ptr << " VS correct value of " << 512e-14 << std::endl;
 body_ptr = nullptr;
535

536
537
538
539
540
541
542
543
544
545
546
547
 //Destroy tensors:
 auto destroyed = false;
 destroyed = exatn::destroyTensor("S2"); assert(destroyed);
 destroyed = exatn::destroyTensor("S1"); assert(destroyed);
 destroyed = exatn::destroyTensor("S0"); assert(destroyed);
 destroyed = exatn::destroyTensor("H0"); assert(destroyed);
 destroyed = exatn::destroyTensor("T2"); assert(destroyed);
 destroyed = exatn::destroyTensor("T1"); assert(destroyed);
 destroyed = exatn::destroyTensor("T0"); assert(destroyed);
 destroyed = exatn::destroyTensor("Z0"); assert(destroyed);
 //Grab a beer!
}
548
#endif
549

550
#ifdef EXATN_TEST4
551
552
TEST(NumServerTester, circuitNumServer)
{
553
554
555
 using exatn::Tensor;
 using exatn::TensorShape;
 using exatn::TensorNetwork;
556
557
 using exatn::TensorElementType;

558
 //exatn::resetLoggingLevel(1,2); //debug
559

560
561
562
563
564
 //Quantum Circuit:
 //Q0----H---------
 //Q1----H----C----
 //Q2----H----N----

565
566
567
568
569
 //Define the initial qubit state vector:
 std::vector<std::complex<double>> qzero {
  {1.0,0.0}, {0.0,0.0}
 };

570
 //Define quantum gates:
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
 std::vector<std::complex<double>> hadamard {
  {1.0,0.0}, {1.0,0.0},
  {1.0,0.0}, {-1.0,0.0}
 };
 std::vector<std::complex<double>> cnot {
  {1.0,0.0}, {0.0,0.0}, {0.0,0.0}, {0.0,0.0},
  {0.0,0.0}, {1.0,0.0}, {0.0,0.0}, {0.0,0.0},
  {0.0,0.0}, {0.0,0.0}, {0.0,0.0}, {1.0,0.0},
  {0.0,0.0}, {0.0,0.0}, {1.0,0.0}, {0.0,0.0}
 };

 //Create qubit tensors:
 auto created = false;
 created = exatn::createTensor("Q0",TensorElementType::COMPLEX64,TensorShape{2}); assert(created);
 created = exatn::createTensor("Q1",TensorElementType::COMPLEX64,TensorShape{2}); assert(created);
 created = exatn::createTensor("Q2",TensorElementType::COMPLEX64,TensorShape{2}); assert(created);

 //Create gate tensors:
589
 auto registered = false;
590
 created = exatn::createTensor("H",TensorElementType::COMPLEX64,TensorShape{2,2}); assert(created);
591
 registered = exatn::registerTensorIsometry("H",{0},{1}); assert(registered);
592
 created = exatn::createTensor("CNOT",TensorElementType::COMPLEX64,TensorShape{2,2,2,2}); assert(created);
593
 registered = exatn::registerTensorIsometry("CNOT",{0,1},{2,3}); assert(registered);
594
595
596
597
598
599
600
601
602
603
604

 //Initialize qubit tensors to zero state:
 auto initialized = false;
 initialized = exatn::initTensorData("Q0",qzero); assert(initialized);
 initialized = exatn::initTensorData("Q1",qzero); assert(initialized);
 initialized = exatn::initTensorData("Q2",qzero); assert(initialized);

 //Initialize necessary gate tensors:
 initialized = exatn::initTensorData("H",hadamard); assert(initialized);
 initialized = exatn::initTensorData("CNOT",cnot); assert(initialized);

605
606
607
 {//Open a new scope:
  //Build a tensor network from the quantum circuit:
  TensorNetwork circuit("QuantumCircuit");
608
  auto appended = false;
609
610
611
  appended = circuit.appendTensor(1,exatn::getTensor("Q0"),{}); assert(appended);
  appended = circuit.appendTensor(2,exatn::getTensor("Q1"),{}); assert(appended);
  appended = circuit.appendTensor(3,exatn::getTensor("Q2"),{}); assert(appended);
612

613
614
615
  appended = circuit.appendTensorGate(4,exatn::getTensor("H"),{0}); assert(appended);
  appended = circuit.appendTensorGate(5,exatn::getTensor("H"),{1}); assert(appended);
  appended = circuit.appendTensorGate(6,exatn::getTensor("H"),{2}); assert(appended);
616

617
  appended = circuit.appendTensorGate(7,exatn::getTensor("CNOT"),{1,2}); assert(appended);
618
619
  circuit.printIt(); //debug

620
621
622
623
624
625
626
627
628
629
630
  //Contract the circuit tensor network with its conjugate:
  TensorNetwork inverse(circuit);
  inverse.rename("InverseCircuit");
  appended = inverse.appendTensorGate(8,exatn::getTensor("CNOT"),{1,2},true); assert(appended);
  appended = inverse.appendTensorGate(9,exatn::getTensor("H"),{2},true); assert(appended);
  appended = inverse.appendTensorGate(10,exatn::getTensor("H"),{1},true); assert(appended);
  appended = inverse.appendTensorGate(11,exatn::getTensor("H"),{0},true); assert(appended);
  auto collapsed = inverse.collapseIsometries(); assert(collapsed);
  inverse.printIt(); //debug

  //Evaluate the quantum circuit expressed as a tensor network:
631
632
  auto evaluated = false;
  evaluated = exatn::evaluateSync(circuit); assert(evaluated);
633

634
635
636
  //Synchronize:
  exatn::sync();
 }
637

638
639
640
641
642
643
644
645
 //Destroy all tensors:
 auto destroyed = false;
 destroyed = exatn::destroyTensor("CNOT"); assert(destroyed);
 destroyed = exatn::destroyTensor("H"); assert(destroyed);
 destroyed = exatn::destroyTensor("Q2"); assert(destroyed);
 destroyed = exatn::destroyTensor("Q1"); assert(destroyed);
 destroyed = exatn::destroyTensor("Q0"); assert(destroyed);

646
647
 //Synchronize:
 exatn::sync();
648
649
 //Grab a beer!
}
650
#endif
651

652
#ifdef EXATN_TEST5
653
654
TEST(NumServerTester, circuitConjugateNumServer)
{
655
656
657
 using exatn::Tensor;
 using exatn::TensorShape;
 using exatn::TensorNetwork;
658
659
 using exatn::TensorElementType;

660
 //exatn::resetLoggingLevel(1,2); //debug
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730

 //Define the initial qubit state vector:
 std::vector<std::complex<double>> qzero {
  {1.0,0.0}, {0.0,0.0}
 };

 //Define quantum gates: *NEGATIVE* imaginary
 std::vector<std::complex<double>> unitary {
  {1.0, 0.0}, {0.0,-1.0},
  {0.0,-1.0}, {1.0, 0.0}
 };

 //Create tensors:
 bool created = exatn::createTensor("Q0", TensorElementType::COMPLEX64,TensorShape{2}); assert(created);
 created = exatn::createTensor("U", TensorElementType::COMPLEX64, TensorShape{2,2}); assert(created);
 bool registered = exatn::registerTensorIsometry("U", {0}, {1}); assert(registered);

 //Initialize tensors:
 bool initialized = exatn::initTensorData("Q0", qzero); assert(initialized);
 initialized = exatn::initTensorData("U", unitary); assert(initialized);

 {//Open a new scope:
  //Build a tensor network representing the quantum circuit:
  TensorNetwork circuit("QuantumCircuit");
  bool appended = circuit.appendTensor(1, exatn::getTensor("Q0"), {}); assert(appended);
  appended = circuit.appendTensorGate(2, exatn::getTensor("U"), {0}); assert(appended);
  circuit.printIt(); //debug

  //Build a conjugated tensor network:
  TensorNetwork conj_circuit(circuit);
  conj_circuit.rename("ConjugatedCircuit");
  conj_circuit.conjugate();
  conj_circuit.printIt(); //debug

  bool evaluated = exatn::evaluateSync(circuit); assert(evaluated);
  evaluated = exatn::evaluateSync(conj_circuit); assert(evaluated);

  //Synchronize:
  exatn::sync();

  //Retrieve the results:
  auto talsh_tensor0 = exatn::getLocalTensor(circuit.getTensor(0)->getName());
  const std::complex<double> * body_ptr0;
  if(talsh_tensor0->getDataAccessHostConst(&body_ptr0)){
   std::cout << "[";
   for(int i = 0; i < talsh_tensor0->getVolume(); ++i){
    std::cout << body_ptr0[i];
   }
   std::cout << "]\n";
  }

  auto talsh_tensor1 = exatn::getLocalTensor(conj_circuit.getTensor(0)->getName());
  const std::complex<double> * body_ptr1;
  if(talsh_tensor1->getDataAccessHostConst(&body_ptr1)){
   std::cout << "[";
   for(int i = 0; i < talsh_tensor1->getVolume(); ++i){
    std::cout << body_ptr1[i];
   }
   std::cout << "]\n";
  }
 }

 //Destroy tensors:
 bool destroyed = exatn::destroyTensor("U"); assert(destroyed);
 destroyed = exatn::destroyTensor("Q0"); assert(destroyed);

 //Synchronize:
 exatn::sync();
 //Grab a coffee!
}
731
#endif
732

733
#ifdef EXATN_TEST6
734
735
TEST(NumServerTester, largeCircuitNumServer)
{
736
737
738
 using exatn::Tensor;
 using exatn::TensorShape;
 using exatn::TensorNetwork;
739
740
 using exatn::TensorElementType;

741
 //exatn::resetLoggingLevel(1,2); //debug
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856

 //Quantum Circuit:
 //Q00---H-----
 //Q01---H-----
 // |
 //Q49---H-----

 const unsigned int nbQubits = 10;

 //Define the initial qubit state vector:
 std::vector<std::complex<double>> qzero {
  {1.0,0.0}, {0.0,0.0}
 };

 //Define quantum gates:
 std::vector<std::complex<double>> hadamard {
  {1.0,0.0}, {1.0,0.0},
  {1.0,0.0}, {-1.0,0.0}
 };

 //Create qubit tensors:
 for (unsigned int i = 0; i < nbQubits; ++i) {
  const bool created = exatn::createTensor("Q" + std::to_string(i),TensorElementType::COMPLEX64,TensorShape{2});
  assert(created);
 }

 //Create gate tensors:
 {
  const bool created = exatn::createTensor("H",TensorElementType::COMPLEX64,TensorShape{2,2});
  assert(created);
  const bool registered =(exatn::registerTensorIsometry("H",{0},{1}));
  assert(registered);
 }

 //Initialize qubit tensors to zero state:
 for (unsigned int i = 0; i < nbQubits; ++i) {
  const bool initialized = exatn::initTensorData("Q" + std::to_string(i),qzero);
  assert(initialized);
 }

 //Initialize necessary gate tensors:
 {
  const bool initialized = exatn::initTensorData("H",hadamard);
  assert(initialized);
 }

 {//Open a new scope:
  //Build a tensor network from the quantum circuit:
  TensorNetwork circuit("QuantumCircuit");
  unsigned int tensorCounter = 1;

  // Qubit tensors:
  for (unsigned int i = 0; i < nbQubits; ++i) {
   const bool appended = circuit.appendTensor(tensorCounter, exatn::getTensor("Q" + std::to_string(i)),{});
   assert(appended);
   ++tensorCounter;
  }

  // Copy the qubit reg tensor to fully-close the entire network
  TensorNetwork qubitReg(circuit);
  qubitReg.rename("QubitKet");

  // Hadamard tensors:
  for (unsigned int i = 0; i < nbQubits; ++i) {
   const bool appended = circuit.appendTensorGate(tensorCounter,exatn::getTensor("H"),{i});
   assert(appended);
   ++tensorCounter;
  }

  circuit.printIt(); //debug

  //Contract the circuit tensor network with its conjugate:
  TensorNetwork inverse(circuit);
  inverse.rename("InverseCircuit");

  for (unsigned int i = 0; i < nbQubits; ++i) {
   const bool appended = inverse.appendTensorGate(tensorCounter,exatn::getTensor("H"),{nbQubits - i - 1}, true);
   assert(appended);
   ++tensorCounter;
  }

  const bool collapsed = inverse.collapseIsometries();
  assert(collapsed);

  inverse.printIt(); //debug

  {// Closing the tensor network with the bra
   auto bra = qubitReg;
   bra.conjugate();
   bra.rename("QubitBra");
   std::vector<std::pair<unsigned int, unsigned int>> pairings;
   for (unsigned int i = 0; i < nbQubits; ++i) {
    pairings.emplace_back(std::make_pair(i, i));
   }
   inverse.appendTensorNetwork(std::move(bra), pairings);
  }

  inverse.printIt(); //debug

  {
   const bool rankEqualZero = (inverse.getRank() == 0);
   assert(rankEqualZero);
  }

  //Evaluate the quantum circuit expressed as a tensor network:
  // NOTE: We evaluate the *inverse* tensor which should be fully-closed.
  const bool evaluated = exatn::evaluateSync(inverse);
  assert(evaluated);

  //Synchronize:
  exatn::sync();

  auto talsh_tensor = exatn::getLocalTensor(inverse.getTensor(0)->getName());
  const std::complex<double>* body_ptr;
  if (talsh_tensor->getDataAccessHostConst(&body_ptr)) {
857
   std::cout << "Final result is " << *body_ptr << "\n";
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
  }
 }

 //Destroy all tensors:
 {
  const bool destroyed = exatn::destroyTensor("H");
  assert(destroyed);
 }

 for (unsigned int i = 0; i < nbQubits; ++i) {
  const bool destroyed = exatn::destroyTensor("Q" + std::to_string(i));
  assert(destroyed);
 }

 //Synchronize:
 exatn::sync();
 //Grab a coffee!
}
876
#endif
877

878
#ifdef EXATN_TEST7
879
880
881
882
883
884
885
TEST(NumServerTester, Sycamore8NumServer)
{
 using exatn::Tensor;
 using exatn::TensorShape;
 using exatn::TensorNetwork;
 using exatn::TensorElementType;

886
 //exatn::resetLoggingLevel(1,2); //debug
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911

 const unsigned int num_qubits = 53;
 const unsigned int num_gates = 172; //total number of gates is 172
 std::vector<std::pair<unsigned int, unsigned int>> sycamore_8_cnot
 {
 {1,4},{3,7},{5,9},{6,13},{8,15},{10,17},{12,21},{14,23},{16,25},{18,27},{20,30},
 {22,32},{24,34},{26,36},{29,37},{31,39},{33,41},{35,43},{38,44},{40,46},{42,48},
 {45,49},{47,51},{50,52},{0,3},{2,6},{4,8},{7,14},{9,16},{11,20},{13,22},{15,24},
 {17,26},{19,29},{21,31},{23,33},{25,35},{30,38},{32,40},{34,42},{39,45},{41,47},
 {46,50},{0,1},{2,3},{4,5},{7,8},{9,10},{11,12},{13,14},{15,16},{17,18},{19,20},
 {21,22},{23,24},{25,26},{28,29},{30,31},{32,33},{34,35},{37,38},{39,40},{41,42},
 {44,45},{46,47},{49,50},{3,4},{6,7},{8,9},{12,13},{14,15},{16,17},{20,21},{22,23},
 {24,25},{26,27},{29,30},{31,32},{33,34},{35,36},{38,39},{40,41},{42,43},{45,46},
 {47,48},{50,51},{0,1},{2,3},{4,5},{7,8},{9,10},{11,12},{13,14},{15,16},{17,18},
 {19,20},{21,22},{23,24},{25,26},{28,29},{30,31},{32,33},{34,35},{37,38},{39,40},
 {41,42},{44,45},{46,47},{49,50},{3,4},{6,7},{8,9},{12,13},{14,15},{16,17},{20,21},
 {22,23},{24,25},{26,27},{29,30},{31,32},{33,34},{35,36},{38,39},{40,41},{42,43},
 {45,46},{47,48},{50,51},{1,4},{3,7},{5,9},{6,13},{8,15},{10,17},{12,21},{14,23},
 {16,25},{18,27},{20,30},{22,32},{24,34},{26,36},{29,37},{31,39},{33,41},{35,43},
 {38,44},{40,46},{42,48},{45,49},{47,51},{50,52},{0,3},{2,6},{4,8},{7,14},{9,16},
 {11,20},{13,22},{15,24},{17,26},{19,29},{21,31},{23,33},{25,35},{30,38},{32,40},
 {34,42},{39,45},{41,47},{46,50}
 };
 assert(num_gates <= sycamore_8_cnot.size());

912
913
 std::cout << "Building the circuit ... " << std::flush;

914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
 TensorNetwork circuit("Sycamore8_CNOT");
 unsigned int tensor_counter = 0;

 //Left qubit tensors:
 unsigned int first_q_tensor = tensor_counter + 1;
 for(unsigned int i = 0; i < num_qubits; ++i){
  bool success = circuit.appendTensor(++tensor_counter,
                                      std::make_shared<Tensor>("Q"+std::to_string(i),TensorShape{2}),
                                      {});
  assert(success);
 }
 unsigned int last_q_tensor = tensor_counter;

 //CNOT gates:
 auto cnot = std::make_shared<Tensor>("CNOT",TensorShape{2,2,2,2});
 for(unsigned int i = 0; i < num_gates; ++i){
  bool success = circuit.appendTensorGate(++tensor_counter,
                                          cnot,
                                          {sycamore_8_cnot[i].first,sycamore_8_cnot[i].second});
  assert(success);
 }

 //Right qubit tensors:
 unsigned int first_p_tensor = tensor_counter + 1;
 for(unsigned int i = 0; i < num_qubits; ++i){
  bool success = circuit.appendTensor(++tensor_counter,
                                      std::make_shared<Tensor>("P"+std::to_string(i),TensorShape{2}),
                                      {{0,0}});
  assert(success);
 }
 unsigned int last_p_tensor = tensor_counter;
945
 std::cout << "Done\n" << std::flush;
946

947
 std::cout << "Simplifying the circuit ... " << std::flush;
948
949
950
951
952
953
954
955
956
957
958
959
960
 //Merge qubit tensors into adjacent CNOTs:
 for(unsigned int i = first_p_tensor; i <= last_p_tensor; ++i){
  const auto & tensor_legs = *(circuit.getTensorConnections(i));
  const auto other_tensor_id = tensor_legs[0].getTensorId();
  bool success = circuit.mergeTensors(other_tensor_id,i,++tensor_counter);
  assert(success);
 }
 for(unsigned int i = first_q_tensor; i <= last_q_tensor; ++i){
  const auto & tensor_legs = *(circuit.getTensorConnections(i));
  const auto other_tensor_id = tensor_legs[0].getTensorId();
  bool success = circuit.mergeTensors(other_tensor_id,i,++tensor_counter);
  assert(success);
 }
961
 std::cout << "Done\n" << std::flush;
962
963
964
965

 circuit.printIt(); //debug

 //Generate the list of tensor operations for the circuit:
966
 std::cout << "Generating the list of tensor operations for the circuit ... " << std::flush;
967
 auto & operations = circuit.getOperationList("metis",true);
968
 std::cout << "Done\n" << std::flush;
969
 unsigned int max_rank = 0;
970
 std::cout << "Total FMA flop count = " << circuit.getFMAFlops()
971
           << ": Max intermdediate presence volume = " << circuit.getMaxIntermediatePresenceVolume()
972
973
           << ": Max intermdediate volume = " << circuit.getMaxIntermediateVolume(&max_rank)
           << ": Max intermdediate rank = " << max_rank << std::endl;
974
975

 std::cout << "Splitting some internal indices to reduce the size of intermediates ... " << std::flush;
976
 circuit.splitIndices(static_cast<std::size_t>(circuit.getMaxIntermediateVolume()/16.0));
977
 std::cout << "Done\n" << std::flush;
978
 circuit.printSplitIndexInfo();
979

980
981
982
 std::size_t num_parts = 2;
 double imbalance = 1.001;
 std::size_t edge_cut = 0, num_cross_edges = 0;
983
984
 std::vector<std::pair<std::size_t,std::vector<std::size_t>>> parts;
 bool success = circuit.partition(num_parts,imbalance,parts,&edge_cut,&num_cross_edges); assert(success);
985
 std::cout << "Partitioned tensor network into " << num_parts
986
           << " parts with tolerated weight imbalance " << imbalance
987
988
           << " achieving edge cut of " << edge_cut
           << " with total cross edges = " << num_cross_edges << ":\n" << std::flush;
989
 std::size_t total_weight = 0;
990
 std::size_t total_vertices = 0;
991
992
993
994
995
996
 for(std::size_t i = 0; i < parts.size(); ++i){
  std::cout << "Partition " << i << " of size " << parts[i].second.size()
            << " with weight " << parts[i].first << ":\n";
  for(const auto & vertex: parts[i].second) std::cout << " " << vertex;
  total_weight += parts[i].first;
  total_vertices += parts[i].second.size();
997
998
  std::cout << std::endl;
 }
999
 std::cout << "Total weight of vertices in all partitions = " << total_weight << std::endl;
1000
 std::cout << "Total number of vertices in all partitions = " << total_vertices << std::endl;