Fixed few bugs, having memory corruption still ... (12b87ac9) · Commits · ORNL Quantum Computing Institute / exatn

src/exatn/exatn_numerics.hpp

+3 −3

Original line number	Diff line number	Diff line
		/** ExaTN::Numerics: General client header (free function API)
		REVISION: 2022/01/07
		REVISION: 2022/01/08

		Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
		@@ -866,12 +866,12 @@ inline bool evaluateSync(const ProcessGroup & process_group, //in: chosen group
		/** Synchronizes all outstanding update operations on a given tensor specified by
		its symbolic name. If ProcessGroup is not provided, defaults to the local process.**/
		inline bool sync(const std::string & name, //in: tensor name
		bool wait = true) //in: wait versus test for completion
		bool wait) //in: wait versus test for completion
		{return numericalServer->sync(name,wait);}

		inline bool sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
		const std::string & name, //in: tensor name
		bool wait = true) //in: wait versus test for completion
		bool wait) //in: wait versus test for completion
		{return numericalServer->sync(process_group,name,wait);}

src/exatn/num_server.cpp

+26 −15

Original line number	Diff line number	Diff line
		/** ExaTN::Numerics: Numerical server
		REVISION: 2022/01/07
		REVISION: 2022/01/08

		Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
		@@ -198,10 +198,10 @@ void NumServer::reconfigureTensorRuntime(const ParamConf & parameters,

		void NumServer::switchComputationalBackend(const std::string & backend_name)
		{
		bool success = tensor_rt_->sync(); assert(success);
		//bool success = sync(); assert(success);
		if(logging_ > 0 && backend_name != comp_backend_){
		logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
		<< "]: Switching computational backend to " << backend_name << std::endl << std::flush;
		<< "]: Switched computational backend to " << backend_name << std::endl << std::flush;
		}
		if(backend_name == "default"){
		comp_backend_ = backend_name;
		@@ -210,7 +210,8 @@ void NumServer::switchComputationalBackend(const std::string & backend_name)
		comp_backend_ = backend_name;
		#endif
		}else{
		std::cout << "#ERROR(exatn::NumServer): switchComputationalBackend: Unknown backend: " << backend_name << std::endl;
		std::cout << "#ERROR(exatn::NumServer): switchComputationalBackend: Unknown backend: "
		<< backend_name << std::endl << std::flush;
		std::abort();
		}
		return;
		@@ -916,15 +917,16 @@ bool NumServer::submit(const ProcessGroup & process_group,
		assert(local_rank < num_procs);
		if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
		<< "]: Submitting tensor network <" << network->getName() << "> (" << network->getTensor(0)->getName()
		<< ") for execution via cuQuantum by " << num_procs << " processes with memory limit "
		<< process_group.getMemoryLimitPerProcess() << " bytes" << std::endl << std::flush;
		<< ":" << getTensorNetworkHash(network) << ") for execution via cuQuantum by " << num_procs
		<< " processes with memory limit " << process_group.getMemoryLimitPerProcess() << " bytes\n" << std::flush;
		if(logging_ > 0) network->printItFile(logfile_);
		const auto exec_handle = tensor_rt_->submit(network,process_group.getMPICommProxy(),num_procs,local_rank);
		bool success = (exec_handle != 0);
		if(success){
		auto res = tn_exec_handles_.emplace(std::make_pair(network->getTensor(0)->getTensorHash(),exec_handle));
		success = res.second;
		if(success && logging_ > 0) logfile_ << "Number of submitted networks via cuQuantum = 1" << std::endl << std::flush;
		if(success && logging_ > 0) logfile_ << "Execution handle of the submitted network via cuQuantum is "
		<< exec_handle << std::endl << std::flush;
		}
		return success;
		}
		@@ -1076,16 +1078,25 @@ bool NumServer::sync(const ProcessGroup & process_group, const Tensor & tensor,
		{
		bool success = true;
		if(!process_group.rankIsIn(process_rank_)) return success; //process is not in the group: Do nothing

		auto iter = tensors_.find(tensor.getName());
		if(iter != tensors_.end()){
		#ifdef CUQUANTUM
		if(comp_backend_ == "cuquantum"){
		auto iter = tn_exec_handles_.find(tensor.getTensorHash());
		bool synced = (iter == tn_exec_handles_.end());
		if(!synced) synced = tensor_rt_->syncNetwork(iter->second,wait);
		return synced;
		auto cuter = tn_exec_handles_.find(iter->second->getTensorHash());
		success = (cuter == tn_exec_handles_.end());
		if(!success){
		success = tensor_rt_->syncNetwork(cuter->second,wait);
		if(success){
		if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
		<< "]: Locally synchronized cuQuantum execution handle " << cuter->second << " via tensor <" << tensor.getName() << ">"
		<< std::endl << std::flush;
		tn_exec_handles_.erase(cuter);
		}
		}
		return success;
		}
		#endif
		auto iter = tensors_.find(tensor.getName());
		if(iter != tensors_.end()){
		if(iter->second->isComposite()){
		auto composite_tensor = castTensorComposite(iter->second); assert(composite_tensor);
		for(auto subtens = composite_tensor->begin(); subtens != composite_tensor->end(); ++subtens){

src/exatn/tests/NumServerTester.cpp

+8 −7

Original line number	Diff line number	Diff line
		@@ -3791,7 +3791,7 @@ TEST(NumServerTester, CuTensorNet) {

		const int NUM_REPEATS = 1;

		exatn::resetLoggingLevel(1,2); //debug
		exatn::resetLoggingLevel(2,2); //debug

		bool success = true;

		@@ -3807,23 +3807,24 @@ TEST(NumServerTester, CuTensorNet) {
		success = exatn::initTensorRnd("C"); assert(success);
		success = exatn::initTensor("D",0.0); assert(success);

		exatn::switchComputationalBackend("default");
		success = exatn::sync(); assert(success);
		exatn::switchComputationalBackend("cuquantum");

		//Contract tensor network:
		int num_repeats = NUM_REPEATS;
		while(--num_repeats >= 0){
		success = exatn::sync(); assert(success);
		std::cout << "D(m,x,n,y)+=A(m,h,k,n)B(u,k,h)C(x,u,y): ";
		auto flops = exatn::getTotalFlopCount();
		auto time_start = exatn::Timer::timeInSecHR();
		success = exatn::evaluateTensorNetwork("cuNet","D(m,x,n,y)+=A(m,h,k,n)B(u,k,h)C(x,u,y)");
		assert(success);
		success = exatn::sync("D"); assert(success);
		success = exatn::evaluateTensorNetwork("cuNet","D(m,x,n,y)+=A(m,h,k,n)B(u,k,h)C(x,u,y)"); assert(success);
		success = exatn::sync("D",true); assert(success);
		auto duration = exatn::Timer::timeInSecHR(time_start);
		flops = exatn::getTotalFlopCount() - flops;
		std::cout << "Performance = " << (flops / (1e9 * duration)) << " Gflop/s" << std::endl;
		std::cout << "Duration = " << duration << " s; Performance = " << (flops / (1e9 * duration)) << " Gflop/s\n";
		}

		//std::this_thread::sleep_for(std::chrono::microseconds(1000000));

		//Destroy tensors:
		success = exatn::sync(); assert(success);
		success = exatn::destroyTensor("D"); assert(success);

src/numerics/tensor_basic.hpp

+19 −19

Original line number	Diff line number	Diff line
		/** ExaTN: Tensor basic types and parameters
		REVISION: 2021/10/15
		REVISION: 2022/01/07

		Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
		Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
		Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/

		#ifndef EXATN_NUMERICS_TENSOR_BASIC_HPP_
		#define EXATN_NUMERICS_TENSOR_BASIC_HPP_
		@@ -63,22 +63,22 @@ enum class IndexKind{

		//Basic tensor operations:
		enum class TensorOpCode{
		NOOP, //no operation
		CREATE, //tensor creation
		DESTROY, //tensor destruction
		TRANSFORM, //tensor transformation/initialization
		SLICE, //tensor slicing
		INSERT, //tensor insertion
		ADD, //tensor addition
		CONTRACT, //tensor contraction
		DECOMPOSE_SVD3, //tensor decomposition via SVD into three tensor factors
		DECOMPOSE_SVD2, //tensor decomposition via SVD into two tensor factors
		ORTHOGONALIZE_SVD, //tensor orthogonalization via SVD
		ORTHOGONALIZE_MGS, //tensor orthogonalization via Modified Gram-Schmidt
		FETCH, //fetch tensor data from another MPI process (parallel execution only)
		UPLOAD, //upload tensor data to another MPI process (parallel execution only)
		BROADCAST, //tensor broadcast (parallel execution only)
		ALLREDUCE //tensor allreduce (parallel execution only)
		NOOP, //0: no operation
		CREATE, //1: tensor creation
		DESTROY, //2: tensor destruction
		TRANSFORM, //3: tensor transformation/initialization
		SLICE, //4: tensor slicing
		INSERT, //5: tensor insertion
		ADD, //6: tensor addition
		CONTRACT, //7: tensor contraction
		DECOMPOSE_SVD3, //8: tensor decomposition via SVD into three tensor factors
		DECOMPOSE_SVD2, //9: tensor decomposition via SVD into two tensor factors
		ORTHOGONALIZE_SVD, //10: tensor orthogonalization via SVD
		ORTHOGONALIZE_MGS, //11: tensor orthogonalization via Modified Gram-Schmidt
		FETCH, //12: fetch tensor data from another MPI process (parallel execution only)
		UPLOAD, //13: upload tensor data to another MPI process (parallel execution only)
		BROADCAST, //14: tensor broadcast (parallel execution only)
		ALLREDUCE //15: tensor allreduce (parallel execution only)
		};

src/runtime/executor/cuquantum/cuquantum_executor.cu

+3 −2

Original line number	Diff line number	Diff line
		/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
		REVISION: 2022/01/07
		REVISION: 2022/01/08

		Copyright (C) 2018-2022 Dmitry Lyakh
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
		@@ -114,7 +114,7 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func,
		unsigned int pipeline_depth,
		unsigned int num_processes, unsigned int process_rank):
		tensor_data_access_func_(std::move(tensor_data_access_func)),
		pipe_depth_(pipeline_depth), num_processes_(num_processes), process_rank_(process_rank)
		pipe_depth_(pipeline_depth), num_processes_(num_processes), process_rank_(process_rank), flops_(0.0)
		{
		static_assert(std::is_same<cutensornetHandle_t,void>::value,"#FATAL(exatn::runtime::CuQuantumExecutor): cutensornetHandle_t != (void)");

		@@ -442,6 +442,7 @@ void CuQuantumExecutor::planExecution(std::shared_ptr<TensorNetworkReq> tn_req)
		tn_req->opt_info,
		CUTENSORNET_CONTRACTION_OPTIMIZER_INFO_FLOP_COUNT,
		&flops,sizeof(flops)));
		flops_ += flops;
		}
		tn_req->exec_status = TensorNetworkQueue::ExecStat::Planning;
		return;