Commit 0256df3a authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Fixed Host buffer leak problem


Signed-off-by: default avatarDmitry I. Lyakh <quant4me@gmail.com>
parent 7dc4b071
Pipeline #189059 failed with stage
in 5 minutes and 41 seconds
#!/bin/bash
#Will monitor the total Flop count and current memory usage:
watch --interval 1 "tail --lines=1024 ./exatn_exec_thread.0.log | grep usage | tail --lines=1"
/** ExaTN::Numerics: Numerical server
REVISION: 2022/01/25
REVISION: 2022/01/26
Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
......@@ -518,6 +518,9 @@ bool NumServer::submitOp(std::shared_ptr<TensorOperation> operation)
std::cout << "#ERROR(exatn::NumServer::submitOp): Attempt to CREATE an already existing tensor "
<< tensor->getName() << std::endl << std::flush;
submitted = false;
//}else{ //debug
//const auto & tens_name = tensor->getName();
//std::cout << "#DEBUG(exatn::NumServer::submitOp): Created tensor " << tens_name << std::endl << std::flush;
}
}else if(operation->getOpcode() == TensorOpCode::DESTROY){
auto tensor = operation->getTensorOperand(0);
......@@ -526,6 +529,9 @@ bool NumServer::submitOp(std::shared_ptr<TensorOperation> operation)
std::cout << "#ERROR(exatn::NumServer::submitOp): Attempt to DESTROY a non-existing tensor "
<< tensor->getName() << std::endl << std::flush;
submitted = false;
//}else{ //debug
//const auto & tens_name = tensor->getName();
//std::cout << "#DEBUG(exatn::NumServer::submitOp): Destroyed tensor " << tens_name << std::endl << std::flush;
}
}
//Submit tensor operation to tensor runtime:
......@@ -1632,7 +1638,8 @@ bool NumServer::destroyTensors()
{
bool success = true;
while(!tensors_.empty()){
success = destroyTensor(tensors_.begin()->first);
const auto tens_name = tensors_.begin()->first;
success = destroyTensor(tens_name);
if(!success) break;
}
return success;
......@@ -1642,7 +1649,8 @@ bool NumServer::destroyTensorsSync()
{
bool success = true;
while(!tensors_.empty()){
success = destroyTensorSync(tensors_.begin()->first);
const auto tens_name = tensors_.begin()->first;
success = destroyTensorSync(tens_name);
if(!success) break;
}
return success;
......
......@@ -21,7 +21,7 @@
#define EXATN_TEST0
#define EXATN_TEST1
/*#define EXATN_TEST2
#define EXATN_TEST2
#define EXATN_TEST3
#define EXATN_TEST4
#define EXATN_TEST5
......@@ -48,11 +48,12 @@
#define EXATN_TEST26
#define EXATN_TEST27 //requires input file from source
#define EXATN_TEST28 //requires input file from source
#define EXATN_TEST29*/
#define EXATN_TEST29
#define EXATN_TEST30
#define EXATN_TEST31 //requires input file from source
#define EXATN_TEST32
#define EXATN_TEST33
#define EXATN_TEST34
#ifdef EXATN_TEST0
......@@ -4127,6 +4128,55 @@ TEST(NumServerTester, CuTensorNet) {
#endif
#ifdef EXATN_TEST33
TEST(NumServerTester, IsometricAIEM) {
using exatn::TensorShape;
using exatn::TensorSignature;
using exatn::Tensor;
using exatn::TensorNetwork;
using exatn::TensorExpansion;
using exatn::TensorOperator;
using exatn::TensorElementType;
using exatn::TensorRange;
const auto TENS_ELEM_TYPE = TensorElementType::COMPLEX64;
//exatn::resetLoggingLevel(1,2); //debug
std::size_t free_mem = 0;
auto used_mem = exatn::getMemoryUsage(&free_mem);
std::cout << "#MSG(exatn): Backend tensor memory usage on entrance = "
<< used_mem << std::endl << std::flush;
assert(used_mem == 0);
bool success = true;
//Create tensors:
success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{}); assert(success);
success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{4,4}); assert(success);
success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{4,4}); assert(success);
//Init tensors:
success = exatn::initTensorRnd("A"); assert(success);
success = exatn::initTensorRnd("B"); assert(success);
success = exatn::initTensor("C",0.0); assert(success);
//Contract tensors:
success = exatn::contractTensors("C(u1,u0)+=A()*B(u0,u1)",1.0); assert(success);
//Destroy tensors:
success = exatn::sync(); assert(success);
success = exatn::destroyTensor("C"); assert(success);
success = exatn::destroyTensor("B"); assert(success);
success = exatn::destroyTensor("A"); assert(success);
//Synchronize:
success = exatn::syncClean(); assert(success);
//exatn::resetLoggingLevel(0,0);
//Grab a beer!
}
#endif
#ifdef EXATN_TEST34
TEST(NumServerTester, TensorComposite) {
using exatn::TensorShape;
using exatn::TensorSignature;
......
......@@ -673,6 +673,7 @@ int TalshNodeExecutor::execute(numerics::TensorOpContract & op,
}
//std::cout << "#DEBUG(exatn::runtime::node_executor_talsh): Tensor contraction " << op.getIndexPattern() << std::endl; //debug
//const auto host_buf_free_mem = talshDeviceBufferFreeSize(0,DEV_HOST); //debug
auto error_code = tens0.contractAccumulate((task_res.first)->second.get(),
op.getIndexPatternReduced(),
tens1,tens2,
......@@ -720,6 +721,11 @@ int TalshNodeExecutor::execute(numerics::TensorOpContract & op,
double flop_count = talsh_submitted_flops_.load() + op.getFlopEstimate() * tensorElementTypeOpFactor(tensor1.getElementType());
talsh_submitted_flops_.store(flop_count);
}
/*if(talshDeviceBufferFreeSize(0,DEV_HOST) < host_buf_free_mem){ //debug
std::cout << "#FATAL(exatn::runtime::TalshNodeExecutor): Host buffer leak detected for tensor contraction:\n";
op.printIt();
std::abort();
}*/
return error_code;
}
......
/** ExaTN: Error handling
REVISION: 2021/09/27
REVISION: 2022/01/26
Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
#ifndef EXATN_ERRORS_HPP_
#define EXATN_ERRORS_HPP_
#ifndef NO_LINUX
#include <execinfo.h> //Linux only
#include <stdio.h>
#endif
#include <iostream>
#include <string>
......@@ -57,6 +62,25 @@ inline void print_variadic_pack(Arg&& arg, Args&&... args)
return print_variadic_pack(std::forward<Args>(args)...);
}
#ifndef NO_LINUX
inline void print_backtrace() //Linux only
{
int MAX_CALLSTACK_DEPTH = 256;
int callstack_depth = 0;
void * addresses[MAX_CALLSTACK_DEPTH];
char ** funcs;
callstack_depth = backtrace(addresses,MAX_CALLSTACK_DEPTH);
funcs = backtrace_symbols(addresses,callstack_depth);
if(funcs != nullptr){
for(int i = 0; i < callstack_depth; ++i){
printf("%s\n",funcs[i]);
}
free(funcs);
}
return;
}
#endif
} //namespace exatn
#endif //EXATN_ERRORS_HPP_
Subproject commit 08bfaaabe281a9ec97d76d068dc745ea0e4b481b
Subproject commit c34feecf09eec379ca82863309c5e53a1fd9745d
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment