Enabled distributed execution in CuQuantum executor, still needs allreduce and testing ... (ad725f47) · Commits · ORNL Quantum Computing Institute / exatn

src/exatn/tests/NumServerTester.cpp

+69 −7

Original line number	Diff line number	Diff line
		@@ -18,14 +18,15 @@
		#include "errors.hpp"

		//Test activation:
		/*#define EXATN_TEST0
		/*
		#define EXATN_TEST0
		#define EXATN_TEST1
		#define EXATN_TEST2
		#define EXATN_TEST3
		#define EXATN_TEST4
		#define EXATN_TEST5*/
		#define EXATN_TEST5
		#define EXATN_TEST6
		/*#define EXATN_TEST7
		#define EXATN_TEST7
		#define EXATN_TEST8
		#define EXATN_TEST9
		#define EXATN_TEST10
		@@ -44,13 +45,15 @@
		#define EXATN_TEST23
		#define EXATN_TEST24
		#define EXATN_TEST25
		#define EXATN_TEST26*/
		#define EXATN_TEST26
		//#define EXATN_TEST27 //requires input file from source
		//#define EXATN_TEST28 //requires input file from source
		//#define EXATN_TEST29
		//#define EXATN_TEST30
		#define EXATN_TEST29
		#define EXATN_TEST30
		//#define EXATN_TEST31 //requires input file from source
		//#define EXATN_TEST32
		*/
		#define EXATN_TEST32
		//#define EXATN_TEST33


		#ifdef EXATN_TEST0
		@@ -3774,6 +3777,65 @@ TEST(NumServerTester, ExcitedMCVQE) {
		#endif

		#ifdef EXATN_TEST32
		TEST(NumServerTester, CuTensorNet) {
		using exatn::TensorShape;
		using exatn::TensorSignature;
		using exatn::Tensor;
		using exatn::TensorNetwork;
		using exatn::TensorExpansion;
		using exatn::TensorOperator;
		using exatn::TensorElementType;
		using exatn::TensorRange;

		const auto TENS_ELEM_TYPE = TensorElementType::REAL32;

		const int NUM_REPEATS = 3;

		//exatn::resetLoggingLevel(1,2); //debug

		bool success = true;

		//Create tensors:
		success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{96,64,64,96}); assert(success);
		success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{96,64,64}); assert(success);
		success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{64,96,64}); assert(success);
		success = exatn::createTensor("D",TENS_ELEM_TYPE,TensorShape{96,64,96,64}); assert(success);

		//Init tensors:
		success = exatn::initTensorRnd("A"); assert(success);
		success = exatn::initTensorRnd("B"); assert(success);
		success = exatn::initTensorRnd("C"); assert(success);
		success = exatn::initTensor("D",0.0); assert(success);

		//Contract tensor network:
		int num_repeats = NUM_REPEATS;
		while(--num_repeats >= 0){
		success = exatn::sync(); assert(success);
		std::cout << "D(m,x,n,y)+=A(m,h,k,n)B(u,k,h)C(x,u,y): ";
		auto flops = exatn::getTotalFlopCount();
		auto time_start = exatn::Timer::timeInSecHR();
		success = exatn::evaluateTensorNetworkSync("cuNet","D(m,x,n,y)+=A(m,h,k,n)B(u,k,h)C(x,u,y)");
		assert(success);
		auto duration = exatn::Timer::timeInSecHR(time_start);
		flops = exatn::getTotalFlopCount() - flops;
		std::cout << "Performance = " << (flops / (1e9 * duration)) << " Gflop/s" << std::endl;
		}

		//Destroy tensors:
		success = exatn::sync(); assert(success);
		success = exatn::destroyTensor("D"); assert(success);
		success = exatn::destroyTensor("C"); assert(success);
		success = exatn::destroyTensor("B"); assert(success);
		success = exatn::destroyTensor("A"); assert(success);

		//Synchronize:
		success = exatn::sync(); assert(success);
		exatn::resetLoggingLevel(0,0);
		//Grab a beer!
		}
		#endif

		#ifdef EXATN_TEST33
		TEST(NumServerTester, TensorComposite) {
		using exatn::TensorShape;
		using exatn::TensorSignature;

src/runtime/executor/cuquantum/cuquantum_executor.cu

+7 −4

Original line number	Diff line number	Diff line
		/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
		REVISION: 2022/01/05
		REVISION: 2022/01/06

		Copyright (C) 2018-2022 Dmitry Lyakh
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
		@@ -108,8 +108,11 @@ struct TensorNetworkReq {
		};


		CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func, unsigned int pipeline_depth):
		tensor_data_access_func_(std::move(tensor_data_access_func)), pipe_depth_(pipeline_depth)
		CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func,
		unsigned int pipeline_depth,
		unsigned int process_rank, unsigned int num_processes):
		tensor_data_access_func_(std::move(tensor_data_access_func)),
		pipe_depth_(pipeline_depth), process_rank_(process_rank), num_processes_(num_processes)
		{
		static_assert(std::is_same<cutensornetHandle_t,void>::value,"#FATAL(exatn::runtime::CuQuantumExecutor): cutensornetHandle_t != (void)");

		@@ -452,7 +455,7 @@ void CuQuantumExecutor::contractTensorNetwork(std::shared_ptr<TensorNetworkReq>
		&num_slices,sizeof(num_slices)));
		assert(num_slices > 0);
		HANDLE_CUDA_ERROR(cudaEventRecord(tn_req->compute_start,tn_req->stream));
		for(int64_t slice_id = 0; slice_id < num_slices; ++slice_id){
		for(int64_t slice_id = process_rank_; slice_id < num_slices; slice_id += num_processes_){
		HANDLE_CTN_ERROR(cutensornetContraction(gpu_attr_[gpu].second.cutn_handle,
		tn_req->comp_plan,
		tn_req->data_in,tn_req->data_out,

src/runtime/executor/cuquantum/cuquantum_executor.hpp

+8 −2

Original line number	Diff line number	Diff line
		/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
		REVISION: 2022/01/05
		REVISION: 2022/01/06

		Copyright (C) 2018-2022 Dmitry Lyakh
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
		@@ -41,7 +41,9 @@ class CuQuantumExecutor {
		public:

		CuQuantumExecutor(TensorImplFunc tensor_data_access_func,
		unsigned int pipeline_depth);
		unsigned int pipeline_depth,
		unsigned int process_rank,
		unsigned int num_processes);

		CuQuantumExecutor(const CuQuantumExecutor &) = delete;
		CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete;
		@@ -98,6 +100,10 @@ protected:
		TensorImplFunc tensor_data_access_func_; //numerics::Tensor --> {tensor_body_ptr, size_in_bytes}
		/ Pipeline depth /
		const unsigned int pipe_depth_;
		/ Process rank /
		const unsigned int process_rank_;
		/ Total number of parallel processes /
		const unsigned int num_processes_;
		};

		} //namespace runtime

src/runtime/executor/graph_executors/lazy/graph_executor_lazy.cpp

+6 −3

Original line number	Diff line number	Diff line
		/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
		REVISION: 2022/01/05
		REVISION: 2022/01/06

		Copyright (C) 2018-2022 Dmitry Lyakh
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
		@@ -25,10 +25,11 @@ namespace runtime {

		void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
		const ParamConf & parameters,
		unsigned int num_processes,
		unsigned int process_rank,
		unsigned int global_process_rank)
		{
		TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank);
		TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,num_processes,process_rank,global_process_rank);
		#ifdef CUQUANTUM
		if(node_executor){
		cuquantum_executor_ = std::make_shared<CuQuantumExecutor>(
		@@ -36,7 +37,9 @@ void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> no
		void * data_ptr = this->node_executor_->getTensorImage(tensor,device_kind,device_id,size);
		return data_ptr;
		},
		CUQUANTUM_PIPELINE_DEPTH
		CUQUANTUM_PIPELINE_DEPTH,
		process_rank,
		num_processes
		);
		}
		#endif

src/runtime/executor/graph_executors/lazy/graph_executor_lazy.hpp

+2 −1

Original line number	Diff line number	Diff line
		/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
		REVISION: 2022/01/05
		REVISION: 2022/01/06

		Copyright (C) 2018-2022 Dmitry Lyakh, Alex McCaskey
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
		@@ -45,6 +45,7 @@ public:
		/ Sets/resets the DAG node executor (tensor operation executor). /
		virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
		const ParamConf & parameters,
		unsigned int num_processes,
		unsigned int process_rank,
		unsigned int global_process_rank) override;