Commit 12b87ac9 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Fixed few bugs, having memory corruption still ...

parent da2a00d4
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
/** ExaTN::Numerics: General client header (free function API)
REVISION: 2022/01/07
REVISION: 2022/01/08

Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
@@ -866,12 +866,12 @@ inline bool evaluateSync(const ProcessGroup & process_group, //in: chosen group
/** Synchronizes all outstanding update operations on a given tensor specified by
    its symbolic name. If ProcessGroup is not provided, defaults to the local process.**/
inline bool sync(const std::string & name, //in: tensor name
                 bool wait = true)         //in: wait versus test for completion
                 bool wait)                //in: wait versus test for completion
 {return numericalServer->sync(name,wait);}

inline bool sync(const ProcessGroup & process_group, //in: chosen group of MPI processes
                 const std::string & name,           //in: tensor name
                 bool wait = true)                   //in: wait versus test for completion
                 bool wait)                          //in: wait versus test for completion
 {return numericalServer->sync(process_group,name,wait);}


+26 −15
Original line number Diff line number Diff line
/** ExaTN::Numerics: Numerical server
REVISION: 2022/01/07
REVISION: 2022/01/08

Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/
@@ -198,10 +198,10 @@ void NumServer::reconfigureTensorRuntime(const ParamConf & parameters,

void NumServer::switchComputationalBackend(const std::string & backend_name)
{
 bool success = tensor_rt_->sync(); assert(success);
 //bool success = sync(); assert(success);
 if(logging_ > 0 && backend_name != comp_backend_){
  logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
           << "]: Switching computational backend to " << backend_name << std::endl << std::flush;
           << "]: Switched computational backend to " << backend_name << std::endl << std::flush;
 }
 if(backend_name == "default"){
  comp_backend_ = backend_name;
@@ -210,7 +210,8 @@ void NumServer::switchComputationalBackend(const std::string & backend_name)
  comp_backend_ = backend_name;
#endif
 }else{
  std::cout << "#ERROR(exatn::NumServer): switchComputationalBackend: Unknown backend: " << backend_name << std::endl;
  std::cout << "#ERROR(exatn::NumServer): switchComputationalBackend: Unknown backend: "
            << backend_name << std::endl << std::flush;
  std::abort();
 }
 return;
@@ -916,15 +917,16 @@ bool NumServer::submit(const ProcessGroup & process_group,
  assert(local_rank < num_procs);
  if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
                            << "]: Submitting tensor network <" << network->getName() << "> (" << network->getTensor(0)->getName()
                            << ") for execution via cuQuantum by " << num_procs << " processes with memory limit "
                            << process_group.getMemoryLimitPerProcess() << " bytes" << std::endl << std::flush;
                            << ":" << getTensorNetworkHash(network) << ") for execution via cuQuantum by " << num_procs
                            << " processes with memory limit " << process_group.getMemoryLimitPerProcess() << " bytes\n" << std::flush;
  if(logging_ > 0) network->printItFile(logfile_);
  const auto exec_handle = tensor_rt_->submit(network,process_group.getMPICommProxy(),num_procs,local_rank);
  bool success = (exec_handle != 0);
  if(success){
   auto res = tn_exec_handles_.emplace(std::make_pair(network->getTensor(0)->getTensorHash(),exec_handle));
   success = res.second;
   if(success && logging_ > 0) logfile_ << "Number of submitted networks via cuQuantum = 1" << std::endl << std::flush;
   if(success && logging_ > 0) logfile_ << "Execution handle of the submitted network via cuQuantum is "
                                        << exec_handle << std::endl << std::flush;
  }
  return success;
 }
@@ -1076,16 +1078,25 @@ bool NumServer::sync(const ProcessGroup & process_group, const Tensor & tensor,
{
 bool success = true;
 if(!process_group.rankIsIn(process_rank_)) return success; //process is not in the group: Do nothing

 auto iter = tensors_.find(tensor.getName());
 if(iter != tensors_.end()){
#ifdef CUQUANTUM
  if(comp_backend_ == "cuquantum"){
  auto iter = tn_exec_handles_.find(tensor.getTensorHash());
  bool synced = (iter == tn_exec_handles_.end());
  if(!synced) synced = tensor_rt_->syncNetwork(iter->second,wait);
  return synced;
   auto cuter = tn_exec_handles_.find(iter->second->getTensorHash());
   success = (cuter == tn_exec_handles_.end());
   if(!success){
    success = tensor_rt_->syncNetwork(cuter->second,wait);
    if(success){
     if(logging_ > 0) logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
      << "]: Locally synchronized cuQuantum execution handle " << cuter->second << " via tensor <" << tensor.getName() << ">"
      << std::endl << std::flush;
     tn_exec_handles_.erase(cuter);
    }
   }
   return success;
  }
#endif
 auto iter = tensors_.find(tensor.getName());
 if(iter != tensors_.end()){
  if(iter->second->isComposite()){
   auto composite_tensor = castTensorComposite(iter->second); assert(composite_tensor);
   for(auto subtens = composite_tensor->begin(); subtens != composite_tensor->end(); ++subtens){
+8 −7
Original line number Diff line number Diff line
@@ -3791,7 +3791,7 @@ TEST(NumServerTester, CuTensorNet) {

 const int NUM_REPEATS = 1;

 exatn::resetLoggingLevel(1,2); //debug
 exatn::resetLoggingLevel(2,2); //debug

 bool success = true;

@@ -3807,23 +3807,24 @@ TEST(NumServerTester, CuTensorNet) {
 success = exatn::initTensorRnd("C"); assert(success);
 success = exatn::initTensor("D",0.0); assert(success);

 exatn::switchComputationalBackend("default");
 success = exatn::sync(); assert(success);
 exatn::switchComputationalBackend("cuquantum");

 //Contract tensor network:
 int num_repeats = NUM_REPEATS;
 while(--num_repeats >= 0){
  success = exatn::sync(); assert(success);
  std::cout << "D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y): ";
  auto flops = exatn::getTotalFlopCount();
  auto time_start = exatn::Timer::timeInSecHR();
  success = exatn::evaluateTensorNetwork("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)");
  assert(success);
  success = exatn::sync("D"); assert(success);
  success = exatn::evaluateTensorNetwork("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)"); assert(success);
  success = exatn::sync("D",true); assert(success);
  auto duration = exatn::Timer::timeInSecHR(time_start);
  flops = exatn::getTotalFlopCount() - flops;
  std::cout << "Performance = " << (flops / (1e9 * duration)) << " Gflop/s" << std::endl;
  std::cout << "Duration = " << duration << " s; Performance = " << (flops / (1e9 * duration)) << " Gflop/s\n";
 }

 //std::this_thread::sleep_for(std::chrono::microseconds(1000000));

 //Destroy tensors:
 success = exatn::sync(); assert(success);
 success = exatn::destroyTensor("D"); assert(success);
+19 −19
Original line number Diff line number Diff line
/** ExaTN: Tensor basic types and parameters
REVISION: 2021/10/15
REVISION: 2022/01/07

Copyright (C) 2018-2021 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle) **/
Copyright (C) 2018-2022 Dmitry I. Lyakh (Liakh)
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle) **/

#ifndef EXATN_NUMERICS_TENSOR_BASIC_HPP_
#define EXATN_NUMERICS_TENSOR_BASIC_HPP_
@@ -63,22 +63,22 @@ enum class IndexKind{

//Basic tensor operations:
enum class TensorOpCode{
 NOOP,              //no operation
 CREATE,            //tensor creation
 DESTROY,           //tensor destruction
 TRANSFORM,         //tensor transformation/initialization
 SLICE,             //tensor slicing
 INSERT,            //tensor insertion
 ADD,               //tensor addition
 CONTRACT,          //tensor contraction
 DECOMPOSE_SVD3,    //tensor decomposition via SVD into three tensor factors
 DECOMPOSE_SVD2,    //tensor decomposition via SVD into two tensor factors
 ORTHOGONALIZE_SVD, //tensor orthogonalization via SVD
 ORTHOGONALIZE_MGS, //tensor orthogonalization via Modified Gram-Schmidt
 FETCH,             //fetch tensor data from another MPI process (parallel execution only)
 UPLOAD,            //upload tensor data to another MPI process (parallel execution only)
 BROADCAST,         //tensor broadcast (parallel execution only)
 ALLREDUCE          //tensor allreduce (parallel execution only)
 NOOP,              //0: no operation
 CREATE,            //1: tensor creation
 DESTROY,           //2: tensor destruction
 TRANSFORM,         //3: tensor transformation/initialization
 SLICE,             //4: tensor slicing
 INSERT,            //5: tensor insertion
 ADD,               //6: tensor addition
 CONTRACT,          //7: tensor contraction
 DECOMPOSE_SVD3,    //8: tensor decomposition via SVD into three tensor factors
 DECOMPOSE_SVD2,    //9: tensor decomposition via SVD into two tensor factors
 ORTHOGONALIZE_SVD, //10: tensor orthogonalization via SVD
 ORTHOGONALIZE_MGS, //11: tensor orthogonalization via Modified Gram-Schmidt
 FETCH,             //12: fetch tensor data from another MPI process (parallel execution only)
 UPLOAD,            //13: upload tensor data to another MPI process (parallel execution only)
 BROADCAST,         //14: tensor broadcast (parallel execution only)
 ALLREDUCE          //15: tensor allreduce (parallel execution only)
};


+3 −2
Original line number Diff line number Diff line
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2022/01/07
REVISION: 2022/01/08

Copyright (C) 2018-2022 Dmitry Lyakh
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
@@ -114,7 +114,7 @@ CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func,
                                     unsigned int pipeline_depth,
                                     unsigned int num_processes, unsigned int process_rank):
 tensor_data_access_func_(std::move(tensor_data_access_func)),
 pipe_depth_(pipeline_depth), num_processes_(num_processes), process_rank_(process_rank)
 pipe_depth_(pipeline_depth), num_processes_(num_processes), process_rank_(process_rank), flops_(0.0)
{
 static_assert(std::is_same<cutensornetHandle_t,void*>::value,"#FATAL(exatn::runtime::CuQuantumExecutor): cutensornetHandle_t != (void*)");

@@ -442,6 +442,7 @@ void CuQuantumExecutor::planExecution(std::shared_ptr<TensorNetworkReq> tn_req)
                                                                   tn_req->opt_info,
                                                                   CUTENSORNET_CONTRACTION_OPTIMIZER_INFO_FLOP_COUNT,
                                                                   &flops,sizeof(flops)));
  flops_ += flops;
 }
 tn_req->exec_status = TensorNetworkQueue::ExecStat::Planning;
 return;
Loading