Commit 0256df3a authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Fixed Host buffer leak problem

parent 7dc4b071
Loading
Loading
Loading
Loading
Loading

monitor.sh

0 → 100755
+3 −0
Original line number Diff line number Diff line
#!/bin/bash
#Will monitor the total Flop count and current memory usage:
watch --interval 1 "tail --lines=1024 ./exatn_exec_thread.0.log | grep usage | tail --lines=1"
+11 −3
Original line number Diff line number Diff line
/** ExaTN::Numerics: Numerical server
REVISION: 2022/01/25
REVISION: 2022/01/26

Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
@@ -518,6 +518,9 @@ bool NumServer::submitOp(std::shared_ptr<TensorOperation> operation)
    std::cout << "#ERROR(exatn::NumServer::submitOp): Attempt to CREATE an already existing tensor "
              << tensor->getName() << std::endl << std::flush;
    submitted = false;
   //}else{ //debug
    //const auto & tens_name = tensor->getName();
    //std::cout << "#DEBUG(exatn::NumServer::submitOp): Created tensor " << tens_name << std::endl << std::flush;
   }
  }else if(operation->getOpcode() == TensorOpCode::DESTROY){
   auto tensor = operation->getTensorOperand(0);
@@ -526,6 +529,9 @@ bool NumServer::submitOp(std::shared_ptr<TensorOperation> operation)
    std::cout << "#ERROR(exatn::NumServer::submitOp): Attempt to DESTROY a non-existing tensor "
              << tensor->getName() << std::endl << std::flush;
    submitted = false;
   //}else{ //debug
    //const auto & tens_name = tensor->getName();
    //std::cout << "#DEBUG(exatn::NumServer::submitOp): Destroyed tensor " << tens_name << std::endl << std::flush;
   }
  }
  //Submit tensor operation to tensor runtime:
@@ -1632,7 +1638,8 @@ bool NumServer::destroyTensors()
{
 bool success = true;
 while(!tensors_.empty()){
  success = destroyTensor(tensors_.begin()->first);
  const auto tens_name = tensors_.begin()->first;
  success = destroyTensor(tens_name);
  if(!success) break;
 }
 return success;
@@ -1642,7 +1649,8 @@ bool NumServer::destroyTensorsSync()
{
 bool success = true;
 while(!tensors_.empty()){
  success = destroyTensorSync(tensors_.begin()->first);
  const auto tens_name = tensors_.begin()->first;
  success = destroyTensorSync(tens_name);
  if(!success) break;
 }
 return success;
+52 −2
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@

#define EXATN_TEST0
#define EXATN_TEST1
/*#define EXATN_TEST2
#define EXATN_TEST2
#define EXATN_TEST3
#define EXATN_TEST4
#define EXATN_TEST5
@@ -48,11 +48,12 @@
#define EXATN_TEST26
#define EXATN_TEST27 //requires input file from source
#define EXATN_TEST28 //requires input file from source
#define EXATN_TEST29*/
#define EXATN_TEST29
#define EXATN_TEST30
#define EXATN_TEST31 //requires input file from source
#define EXATN_TEST32
#define EXATN_TEST33
#define EXATN_TEST34


#ifdef EXATN_TEST0
@@ -4127,6 +4128,55 @@ TEST(NumServerTester, CuTensorNet) {
#endif

#ifdef EXATN_TEST33
TEST(NumServerTester, IsometricAIEM) {
 using exatn::TensorShape;
 using exatn::TensorSignature;
 using exatn::Tensor;
 using exatn::TensorNetwork;
 using exatn::TensorExpansion;
 using exatn::TensorOperator;
 using exatn::TensorElementType;
 using exatn::TensorRange;

 const auto TENS_ELEM_TYPE = TensorElementType::COMPLEX64;

 //exatn::resetLoggingLevel(1,2); //debug

 std::size_t free_mem = 0;
 auto used_mem = exatn::getMemoryUsage(&free_mem);
 std::cout << "#MSG(exatn): Backend tensor memory usage on entrance = "
           << used_mem << std::endl << std::flush;
 assert(used_mem == 0);

 bool success = true;

 //Create tensors:
 success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{}); assert(success);
 success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{4,4}); assert(success);
 success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{4,4}); assert(success);

 //Init tensors:
 success = exatn::initTensorRnd("A"); assert(success);
 success = exatn::initTensorRnd("B"); assert(success);
 success = exatn::initTensor("C",0.0); assert(success);

 //Contract tensors:
 success = exatn::contractTensors("C(u1,u0)+=A()*B(u0,u1)",1.0); assert(success);

 //Destroy tensors:
 success = exatn::sync(); assert(success);
 success = exatn::destroyTensor("C"); assert(success);
 success = exatn::destroyTensor("B"); assert(success);
 success = exatn::destroyTensor("A"); assert(success);

 //Synchronize:
 success = exatn::syncClean(); assert(success);
 //exatn::resetLoggingLevel(0,0);
 //Grab a beer!
}
#endif

#ifdef EXATN_TEST34
TEST(NumServerTester, TensorComposite) {
 using exatn::TensorShape;
 using exatn::TensorSignature;
+6 −0
Original line number Diff line number Diff line
@@ -673,6 +673,7 @@ int TalshNodeExecutor::execute(numerics::TensorOpContract & op,
 }

 //std::cout << "#DEBUG(exatn::runtime::node_executor_talsh): Tensor contraction " << op.getIndexPattern() << std::endl; //debug
 //const auto host_buf_free_mem = talshDeviceBufferFreeSize(0,DEV_HOST); //debug
 auto error_code = tens0.contractAccumulate((task_res.first)->second.get(),
                                            op.getIndexPatternReduced(),
                                            tens1,tens2,
@@ -720,6 +721,11 @@ int TalshNodeExecutor::execute(numerics::TensorOpContract & op,
  double flop_count = talsh_submitted_flops_.load() + op.getFlopEstimate() * tensorElementTypeOpFactor(tensor1.getElementType());
  talsh_submitted_flops_.store(flop_count);
 }
 /*if(talshDeviceBufferFreeSize(0,DEV_HOST) < host_buf_free_mem){ //debug
  std::cout << "#FATAL(exatn::runtime::TalshNodeExecutor): Host buffer leak detected for tensor contraction:\n";
  op.printIt();
  std::abort();
 }*/
 return error_code;
}

+27 −3
Original line number Diff line number Diff line
/** ExaTN: Error handling
REVISION: 2021/09/27
REVISION: 2022/01/26

Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/

#ifndef EXATN_ERRORS_HPP_
#define EXATN_ERRORS_HPP_

#ifndef NO_LINUX
#include <execinfo.h> //Linux only
#include <stdio.h>
#endif

#include <iostream>
#include <string>

@@ -57,6 +62,25 @@ inline void print_variadic_pack(Arg&& arg, Args&&... args)
 return print_variadic_pack(std::forward<Args>(args)...);
}

#ifndef NO_LINUX
inline void print_backtrace() //Linux only
{
 int MAX_CALLSTACK_DEPTH = 256;
 int callstack_depth = 0;
 void * addresses[MAX_CALLSTACK_DEPTH];
 char ** funcs;
 callstack_depth = backtrace(addresses,MAX_CALLSTACK_DEPTH);
 funcs = backtrace_symbols(addresses,callstack_depth);
 if(funcs != nullptr){
  for(int i = 0; i < callstack_depth; ++i){
   printf("%s\n",funcs[i]);
  }
  free(funcs);
 }
 return;
}
#endif

} //namespace exatn

#endif //EXATN_ERRORS_HPP_
Loading