Commit ad725f47 authored by Dmitry I. Lyakh's avatar Dmitry I. Lyakh
Browse files

Enabled distributed execution in CuQuantum executor, still needs allreduce and testing ...

parent ed6dc07f
Loading
Loading
Loading
Loading
+69 −7
Original line number Diff line number Diff line
@@ -18,14 +18,15 @@
#include "errors.hpp"

//Test activation:
/*#define EXATN_TEST0
/*
#define EXATN_TEST0
#define EXATN_TEST1
#define EXATN_TEST2
#define EXATN_TEST3
#define EXATN_TEST4
#define EXATN_TEST5*/
#define EXATN_TEST5
#define EXATN_TEST6
/*#define EXATN_TEST7
#define EXATN_TEST7
#define EXATN_TEST8
#define EXATN_TEST9
#define EXATN_TEST10
@@ -44,13 +45,15 @@
#define EXATN_TEST23
#define EXATN_TEST24
#define EXATN_TEST25
#define EXATN_TEST26*/
#define EXATN_TEST26
//#define EXATN_TEST27 //requires input file from source
//#define EXATN_TEST28 //requires input file from source
//#define EXATN_TEST29
//#define EXATN_TEST30
#define EXATN_TEST29
#define EXATN_TEST30
//#define EXATN_TEST31 //requires input file from source
//#define EXATN_TEST32
*/
#define EXATN_TEST32
//#define EXATN_TEST33


#ifdef EXATN_TEST0
@@ -3774,6 +3777,65 @@ TEST(NumServerTester, ExcitedMCVQE) {
#endif

#ifdef EXATN_TEST32
TEST(NumServerTester, CuTensorNet) {
 using exatn::TensorShape;
 using exatn::TensorSignature;
 using exatn::Tensor;
 using exatn::TensorNetwork;
 using exatn::TensorExpansion;
 using exatn::TensorOperator;
 using exatn::TensorElementType;
 using exatn::TensorRange;

 const auto TENS_ELEM_TYPE = TensorElementType::REAL32;

 const int NUM_REPEATS = 3;

 //exatn::resetLoggingLevel(1,2); //debug

 bool success = true;

 //Create tensors:
 success = exatn::createTensor("A",TENS_ELEM_TYPE,TensorShape{96,64,64,96}); assert(success);
 success = exatn::createTensor("B",TENS_ELEM_TYPE,TensorShape{96,64,64}); assert(success);
 success = exatn::createTensor("C",TENS_ELEM_TYPE,TensorShape{64,96,64}); assert(success);
 success = exatn::createTensor("D",TENS_ELEM_TYPE,TensorShape{96,64,96,64}); assert(success);

 //Init tensors:
 success = exatn::initTensorRnd("A"); assert(success);
 success = exatn::initTensorRnd("B"); assert(success);
 success = exatn::initTensorRnd("C"); assert(success);
 success = exatn::initTensor("D",0.0); assert(success);

 //Contract tensor network:
 int num_repeats = NUM_REPEATS;
 while(--num_repeats >= 0){
  success = exatn::sync(); assert(success);
  std::cout << "D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y): ";
  auto flops = exatn::getTotalFlopCount();
  auto time_start = exatn::Timer::timeInSecHR();
  success = exatn::evaluateTensorNetworkSync("cuNet","D(m,x,n,y)+=A(m,h,k,n)*B(u,k,h)*C(x,u,y)");
  assert(success);
  auto duration = exatn::Timer::timeInSecHR(time_start);
  flops = exatn::getTotalFlopCount() - flops;
  std::cout << "Performance = " << (flops / (1e9 * duration)) << " Gflop/s" << std::endl;
 }

 //Destroy tensors:
 success = exatn::sync(); assert(success);
 success = exatn::destroyTensor("D"); assert(success);
 success = exatn::destroyTensor("C"); assert(success);
 success = exatn::destroyTensor("B"); assert(success);
 success = exatn::destroyTensor("A"); assert(success);

 //Synchronize:
 success = exatn::sync(); assert(success);
 exatn::resetLoggingLevel(0,0);
 //Grab a beer!
}
#endif

#ifdef EXATN_TEST33
TEST(NumServerTester, TensorComposite) {
 using exatn::TensorShape;
 using exatn::TensorSignature;
+7 −4
Original line number Diff line number Diff line
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2022/01/05
REVISION: 2022/01/06

Copyright (C) 2018-2022 Dmitry Lyakh
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
@@ -108,8 +108,11 @@ struct TensorNetworkReq {
};


CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func, unsigned int pipeline_depth):
 tensor_data_access_func_(std::move(tensor_data_access_func)), pipe_depth_(pipeline_depth)
CuQuantumExecutor::CuQuantumExecutor(TensorImplFunc tensor_data_access_func,
                                     unsigned int pipeline_depth,
                                     unsigned int process_rank, unsigned int num_processes):
 tensor_data_access_func_(std::move(tensor_data_access_func)),
 pipe_depth_(pipeline_depth), process_rank_(process_rank), num_processes_(num_processes)
{
 static_assert(std::is_same<cutensornetHandle_t,void*>::value,"#FATAL(exatn::runtime::CuQuantumExecutor): cutensornetHandle_t != (void*)");

@@ -452,7 +455,7 @@ void CuQuantumExecutor::contractTensorNetwork(std::shared_ptr<TensorNetworkReq>
                                                                   &num_slices,sizeof(num_slices)));
  assert(num_slices > 0);
  HANDLE_CUDA_ERROR(cudaEventRecord(tn_req->compute_start,tn_req->stream));
  for(int64_t slice_id = 0; slice_id < num_slices; ++slice_id){
  for(int64_t slice_id = process_rank_; slice_id < num_slices; slice_id += num_processes_){
   HANDLE_CTN_ERROR(cutensornetContraction(gpu_attr_[gpu].second.cutn_handle,
                                           tn_req->comp_plan,
                                           tn_req->data_in,tn_req->data_out,
+8 −2
Original line number Diff line number Diff line
/** ExaTN: Tensor Runtime: Tensor network executor: NVIDIA cuQuantum
REVISION: 2022/01/05
REVISION: 2022/01/06

Copyright (C) 2018-2022 Dmitry Lyakh
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
@@ -41,7 +41,9 @@ class CuQuantumExecutor {
public:

 CuQuantumExecutor(TensorImplFunc tensor_data_access_func,
                   unsigned int pipeline_depth);
                   unsigned int pipeline_depth,
                   unsigned int process_rank,
                   unsigned int num_processes);

 CuQuantumExecutor(const CuQuantumExecutor &) = delete;
 CuQuantumExecutor & operator=(CuQuantumExecutor &) = delete;
@@ -98,6 +100,10 @@ protected:
 TensorImplFunc tensor_data_access_func_; //numerics::Tensor --> {tensor_body_ptr, size_in_bytes}
 /** Pipeline depth **/
 const unsigned int pipe_depth_;
 /** Process rank **/
 const unsigned int process_rank_;
 /** Total number of parallel processes **/
 const unsigned int num_processes_;
};

} //namespace runtime
+6 −3
Original line number Diff line number Diff line
/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
REVISION: 2022/01/05
REVISION: 2022/01/06

Copyright (C) 2018-2022 Dmitry Lyakh
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
@@ -25,10 +25,11 @@ namespace runtime {

void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
                                          const ParamConf & parameters,
                                          unsigned int num_processes,
                                          unsigned int process_rank,
                                          unsigned int global_process_rank)
{
  TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,process_rank,global_process_rank);
  TensorGraphExecutor::resetNodeExecutor(node_executor,parameters,num_processes,process_rank,global_process_rank);
#ifdef CUQUANTUM
  if(node_executor){
    cuquantum_executor_ = std::make_shared<CuQuantumExecutor>(
@@ -36,7 +37,9 @@ void LazyGraphExecutor::resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> no
        void * data_ptr = this->node_executor_->getTensorImage(tensor,device_kind,device_id,size);
        return data_ptr;
      },
      CUQUANTUM_PIPELINE_DEPTH
      CUQUANTUM_PIPELINE_DEPTH,
      process_rank,
      num_processes
    );
  }
#endif
+2 −1
Original line number Diff line number Diff line
/** ExaTN:: Tensor Runtime: Tensor graph executor: Lazy
REVISION: 2022/01/05
REVISION: 2022/01/06

Copyright (C) 2018-2022 Dmitry Lyakh, Alex McCaskey
Copyright (C) 2018-2022 Oak Ridge National Laboratory (UT-Battelle)
@@ -45,6 +45,7 @@ public:
  /** Sets/resets the DAG node executor (tensor operation executor). **/
  virtual void resetNodeExecutor(std::shared_ptr<TensorNodeExecutor> node_executor,
                                 const ParamConf & parameters,
                                 unsigned int num_processes,
                                 unsigned int process_rank,
                                 unsigned int global_process_rank) override;

Loading