Fixed Host buffer leak problem (0256df3a) · Commits · ORNL Quantum Computing Institute / exatn

monitor.sh

0 → 100755

+3 −0

Original line number	Diff line number	Diff line
		#!/bin/bash
		#Will monitor the total Flop count and current memory usage:
		watch --interval 1 "tail --lines=1024 ./exatn_exec_thread.0.log \| grep usage \| tail --lines=1"

src/exatn/num_server.cpp

+11 −3

Original line number	Diff line number	Diff line
		/** ExaTN::Numerics: Numerical server
		REVISION: 2022/01/25
		REVISION: 2022/01/26

		Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
		@@ -518,6 +518,9 @@ bool NumServer::submitOp(std::shared_ptr<TensorOperation> operation)
		std::cout << "#ERROR(exatn::NumServer::submitOp): Attempt to CREATE an already existing tensor "
		<< tensor->getName() << std::endl << std::flush;
		submitted = false;
		//}else{ //debug
		//const auto & tens_name = tensor->getName();
		//std::cout << "#DEBUG(exatn::NumServer::submitOp): Created tensor " << tens_name << std::endl << std::flush;
		}
		}else if(operation->getOpcode() == TensorOpCode::DESTROY){
		auto tensor = operation->getTensorOperand(0);
		@@ -526,6 +529,9 @@ bool NumServer::submitOp(std::shared_ptr<TensorOperation> operation)
		std::cout << "#ERROR(exatn::NumServer::submitOp): Attempt to DESTROY a non-existing tensor "
		<< tensor->getName() << std::endl << std::flush;
		submitted = false;
		//}else{ //debug
		//const auto & tens_name = tensor->getName();
		//std::cout << "#DEBUG(exatn::NumServer::submitOp): Destroyed tensor " << tens_name << std::endl << std::flush;
		}
		}
		//Submit tensor operation to tensor runtime:
		@@ -1632,7 +1638,8 @@ bool NumServer::destroyTensors()
		{
		bool success = true;
		while(!tensors_.empty()){
		success = destroyTensor(tensors_.begin()->first);
		const auto tens_name = tensors_.begin()->first;
		success = destroyTensor(tens_name);
		if(!success) break;
		}
		return success;
		@@ -1642,7 +1649,8 @@ bool NumServer::destroyTensorsSync()
		{
		bool success = true;
		while(!tensors_.empty()){
		success = destroyTensorSync(tensors_.begin()->first);
		const auto tens_name = tensors_.begin()->first;
		success = destroyTensorSync(tens_name);
		if(!success) break;
		}
		return success;

src/exatn/tests/NumServerTester.cpp

+52 −2

Original line number	Diff line number	Diff line
		@@ -21,7 +21,7 @@

		#define EXATN_TEST0
		#define EXATN_TEST1
		/*#define EXATN_TEST2
		#define EXATN_TEST2
		#define EXATN_TEST3
		#define EXATN_TEST4
		#define EXATN_TEST5
		@@ -48,11 +48,12 @@
		#define EXATN_TEST26
		#define EXATN_TEST27 //requires input file from source
		#define EXATN_TEST28 //requires input file from source
		#define EXATN_TEST29*/
		#define EXATN_TEST29
		#define EXATN_TEST30
		#define EXATN_TEST31 //requires input file from source
		#define EXATN_TEST32
		#define EXATN_TEST33
		#define EXATN_TEST34


		#ifdef EXATN_TEST0
		@@ -4127,6 +4128,55 @@ TEST(NumServerTester, CuTensorNet) {
		#endif

		#ifdef EXATN_TEST33
		TEST(NumServerTester, IsometricAIEM) {
		using exatn::TensorShape;
		using exatn::TensorSignature;
		using exatn::Tensor;
		using exatn::TensorNetwork;
		using exatn::TensorExpansion;
		using exatn::TensorOperator;
		using exatn::TensorElementType;
		using exatn::TensorRange;

		const auto TENS_ELEM_TYPE = TensorElementType::COMPLEX64;

		//exatn::resetLoggingLevel(1,2); //debug

		std::size_t free_mem = 0;
		auto used_mem = exatn::getMemoryUsage(&free_mem);
		std::cout << "#MSG(exatn): Backend tensor memory usage on entrance = "
		<< used_mem << std::endl << std::flush;
		assert(used_mem == 0);

		bool success = true;

		//Create tensors:
		success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{}); assert(success);
		success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{4,4}); assert(success);
		success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{4,4}); assert(success);

		//Init tensors:
		success = exatn::initTensorRnd("A"); assert(success);
		success = exatn::initTensorRnd("B"); assert(success);
		success = exatn::initTensor("C",0.0); assert(success);

		//Contract tensors:
		success = exatn::contractTensors("C(u1,u0)+=A()*B(u0,u1)",1.0); assert(success);

		//Destroy tensors:
		success = exatn::sync(); assert(success);
		success = exatn::destroyTensor("C"); assert(success);
		success = exatn::destroyTensor("B"); assert(success);
		success = exatn::destroyTensor("A"); assert(success);

		//Synchronize:
		success = exatn::syncClean(); assert(success);
		//exatn::resetLoggingLevel(0,0);
		//Grab a beer!
		}
		#endif

		#ifdef EXATN_TEST34
		TEST(NumServerTester, TensorComposite) {
		using exatn::TensorShape;
		using exatn::TensorSignature;

src/runtime/executor/node_executors/talsh/node_executor_talsh.cpp

+6 −0

Original line number	Diff line number	Diff line
		@@ -673,6 +673,7 @@ int TalshNodeExecutor::execute(numerics::TensorOpContract & op,
		}

		//std::cout << "#DEBUG(exatn::runtime::node_executor_talsh): Tensor contraction " << op.getIndexPattern() << std::endl; //debug
		//const auto host_buf_free_mem = talshDeviceBufferFreeSize(0,DEV_HOST); //debug
		auto error_code = tens0.contractAccumulate((task_res.first)->second.get(),
		op.getIndexPatternReduced(),
		tens1,tens2,
		@@ -720,6 +721,11 @@ int TalshNodeExecutor::execute(numerics::TensorOpContract & op,
		double flop_count = talsh_submitted_flops_.load() + op.getFlopEstimate() * tensorElementTypeOpFactor(tensor1.getElementType());
		talsh_submitted_flops_.store(flop_count);
		}
		/*if(talshDeviceBufferFreeSize(0,DEV_HOST) < host_buf_free_mem){ //debug
		std::cout << "#FATAL(exatn::runtime::TalshNodeExecutor): Host buffer leak detected for tensor contraction:\n";
		op.printIt();
		std::abort();
		}*/
		return error_code;
		}

src/utils/errors.hpp

+27 −3

Original line number	Diff line number	Diff line
		/** ExaTN: Error handling
		REVISION: 2021/09/27
		REVISION: 2022/01/26

		Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
		Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
		Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
		Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/

		#ifndef EXATN_ERRORS_HPP_
		#define EXATN_ERRORS_HPP_

		#ifndef NO_LINUX
		#include <execinfo.h> //Linux only
		#include <stdio.h>
		#endif

		#include <iostream>
		#include <string>

		@@ -57,6 +62,25 @@ inline void print_variadic_pack(Arg&& arg, Args&&... args)
		return print_variadic_pack(std::forward<Args>(args)...);
		}

		#ifndef NO_LINUX
		inline void print_backtrace() //Linux only
		{
		int MAX_CALLSTACK_DEPTH = 256;
		int callstack_depth = 0;
		void * addresses[MAX_CALLSTACK_DEPTH];
		char ** funcs;
		callstack_depth = backtrace(addresses,MAX_CALLSTACK_DEPTH);
		funcs = backtrace_symbols(addresses,callstack_depth);
		if(funcs != nullptr){
		for(int i = 0; i < callstack_depth; ++i){
		printf("%s\n",funcs[i]);
		}
		free(funcs);
		}
		return;
		}
		#endif

		} //namespace exatn

		#endif //EXATN_ERRORS_HPP_