graph_executor_eager.cpp 3.44 KB
Newer Older
1
/** ExaTN:: Tensor Runtime: Tensor graph executor: Eager
2
REVISION: 2021/12/22
3

4
5
Copyright (C) 2018-2021 Tiffany Mintz, Dmitry Lyakh, Alex McCaskey
Copyright (C) 2018-2021 Oak Ridge National Laboratory (UT-Battelle)
6
7
**/

8
#include "graph_executor_eager.hpp"
9

10
11
#include "talshxx.hpp"

12
#include <iostream>
13
#include <iomanip>
14

15
#include "errors.hpp"
16

17
18
19
namespace exatn {
namespace runtime {

20
void EagerGraphExecutor::execute(TensorGraph & dag) {
21
  auto num_nodes = dag.getNumNodes();
22
  auto current = dag.getFrontNode();
23
24
  while(current < num_nodes){
    TensorOpExecHandle exec_handle;
25
26
    auto & dag_node = dag.getNodeProperties(current);
    if(!(dag_node.isExecuted())){
27
      dag.setNodeExecuting(current);
28
29
      auto op = dag_node.getOperation();
      if(logging_.load() != 0){
30
        logfile_ << "[" << std::fixed << std::setprecision(6) << exatn::Timer::timeInSecHR(getTimeStampStart())
31
                 << "](EagerGraphExecutor)[EXEC_THREAD]: Submitting tensor operation "
32
33
34
35
36
37
                 << current << ": Opcode = " << static_cast<int>(op->getOpcode()); //debug
        if(logging_.load() > 1){
          logfile_ << ": Details:" << std::endl;
          op->printItFile(logfile_);
        }
      }
38
      op->recordStartTime();
39
40
41
42
      auto error_code = op->accept(*node_executor_,&exec_handle);
      if(logging_.load() != 0){
        logfile_ << ": Status = " << error_code << ": "; //debug
      }
43
      if(error_code == 0){
44
45
46
        if(logging_.load() != 0){
          logfile_ << "Syncing ... "; //debug
        }
47
        auto synced = node_executor_->sync(exec_handle,&error_code,true);
48
        op->recordFinishTime();
49
        if(synced && error_code == 0){
50
          dag.setNodeExecuted(current);
51
          if(logging_.load() != 0){
52
            logfile_ << "Success [" << std::fixed << std::setprecision(6)
53
                     << exatn::Timer::timeInSecHR(getTimeStampStart()) << "]" << std::endl; //debug
54
            //logfile_.flush();
55
          }
56
          dag.progressFrontNode(current);
57
          ++current;
58
59
        }else{ //failed to synchronize the submitted tensor operation
          node_executor_->discard(exec_handle);
60
          if(error_code != 0) dag.setNodeExecuted(current,error_code);
61
62
          if(logging_.load() != 0){
            logfile_ << "Failed to synchronize tensor operation: Error " << error_code << std::endl;
63
            //logfile_.flush();
64
          }
65
66
          std::cout << "#ERROR(exatn::TensorRuntime::GraphExecutorEager): Failed to synchronize tensor operation: Error "
                    << error_code << std::endl << std::flush;
67
68
          assert(false);
        }
69
70
      }else{ //failed to submit the tensor operation
        node_executor_->discard(exec_handle);
71
        dag.setNodeIdle(current);
72
73
74
75
76
        if(error_code != TRY_LATER && error_code != DEVICE_UNABLE){
         std::cout << "#ERROR(exatn::TensorRuntime::GraphExecutorEager): Failed to submit tensor operation: Error "
                   << error_code << std::endl << std::flush;
         assert(false);
        }
77
78
        if(logging_.load() != 0){
          logfile_ << "Will retry again" << std::endl; //debug
79
          //logfile_.flush();
80
        }
81
82
      }
    }else{
83
      ++current;
84
85
86
    }
    num_nodes = dag.getNumNodes();
  }
87
  return;
88
89
}

90
91

void EagerGraphExecutor::execute(TensorNetworkQueue & tensor_network_queue) {
92
  std::cout << "#FATAL(exatn::runtime::EagerGraphExecutor::execute): Processing of entire tensor networks is not implemented!\n";
93
94
95
  assert(false);
}

96
97
} //namespace runtime
} //namespace exatn