Commit bec37c3f authored by bmahjour's avatar bmahjour
Browse files

[DDG] Data Dependence Graph - Topological Sort

Summary:
In this patch the DDG DAG is sorted topologically to put the
nodes in the graph in the order that would satisfy all
dependencies. This helps transformations that would like to
generate code based on the DDG. Since the DDG is a DAG a
reverse-post-order traversal would give us the topological
ordering. This patch also sorts the basic blocks passed to
the builder based on program order to ensure that the
dependencies are computed in the correct direction.

Authored By: bmahjour

Reviewer: Meinersbur, fhahn, myhsu, xtian, dmgreen, kbarton, jdoerfert

Reviewed By: Meinersbur

Subscribers: ychen, arphaman, simoll, a.elovikov, mgorny, hiraditya, jfb, wuzish, llvm-commits, jsji, Whitney, etiotto, ppc-slack

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70609
parent fc31b58e
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -300,6 +300,7 @@ using DDGInfo = DependenceGraphInfo<DDGNode>;

/// Data Dependency Graph
class DataDependenceGraph : public DDGBase, public DDGInfo {
  friend AbstractDependenceGraphBuilder<DataDependenceGraph>;
  friend class DDGBuilder;

public:
@@ -311,7 +312,7 @@ public:
  DataDependenceGraph(DataDependenceGraph &&G)
      : DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
  DataDependenceGraph(Function &F, DependenceInfo &DI);
  DataDependenceGraph(const Loop &L, DependenceInfo &DI);
  DataDependenceGraph(Loop &L, LoopInfo &LI, DependenceInfo &DI);
  ~DataDependenceGraph();

  /// If node \p N belongs to a pi-block return a pointer to the pi-block,
+4 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ public:
    createMemoryDependencyEdges();
    createAndConnectRootNode();
    createPiBlocks();
    sortNodesTopologically();
  }

  /// Create fine grained nodes. These are typically atomic nodes that
@@ -84,6 +85,9 @@ public:
  /// the dependence graph into an acyclic graph.
  void createPiBlocks();

  /// Topologically sort the graph nodes.
  void sortNodesTopologically();

protected:
  /// Create the root node of the graph.
  virtual NodeType &createRootNode() = 0;
+16 −5
Original line number Diff line number Diff line
@@ -9,7 +9,9 @@
// The implementation for the data dependence graph.
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DDG.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;
@@ -179,19 +181,28 @@ using BasicBlockListType = SmallVector<BasicBlock *, 8>;

DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
    : DependenceGraphInfo(F.getName().str(), D) {
  // Put the basic blocks in program order for correct dependence
  // directions.
  BasicBlockListType BBList;
  for (auto &BB : F.getBasicBlockList())
    BBList.push_back(&BB);
  for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
    for (BasicBlock * BB : SCC)
      BBList.push_back(BB);
  std::reverse(BBList.begin(), BBList.end());
  DDGBuilder(*this, D, BBList).populate();
}

DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D)
DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI,
                                         DependenceInfo &D)
    : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." +
                                L.getHeader()->getName())
                              .str(),
                          D) {
  // Put the basic blocks in program order for correct dependence
  // directions.
  LoopBlocksDFS DFS(&L);
  DFS.perform(&LI);
  BasicBlockListType BBList;
  for (BasicBlock *BB : L.blocks())
  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
    BBList.push_back(BB);
  DDGBuilder(*this, D, BBList).populate();
}
@@ -259,7 +270,7 @@ DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
                                     LoopStandardAnalysisResults &AR) {
  Function *F = L.getHeader()->getParent();
  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
  return std::make_unique<DataDependenceGraph>(L, DI);
  return std::make_unique<DataDependenceGraph>(L, AR.LI, DI);
}
AnalysisKey DDGAnalysis::Key;

+16 −0
Original line number Diff line number Diff line
@@ -353,5 +353,21 @@ void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
  }
}

template <class G>
void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {

  // If we don't create pi-blocks, then we may not have a DAG.
  if (!shouldCreatePiBlocks())
    return;

  SmallVector<NodeType *, 64> NodesInPO;
  for (NodeType *N : post_order(&Graph))
    NodesInPO.push_back(N);
  
  Graph.Nodes.clear();
  for (auto &N : make_range(NodesInPO.rbegin(), NodesInPO.rend()))
    Graph.Nodes.push_back(N);
}

template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
template class llvm::DependenceGraphInfo<DDGNode>;
+84 −81
Original line number Diff line number Diff line
; RUN: opt < %s -disable-output "-passes=print<ddg>" 2>&1 | FileCheck %s

; CHECK-LABEL: 'DDG' for loop 'test1.for.body':
; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction

; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block
; CHECK-NEXT: --- start of nodes in pi-block ---
; CHECK-NEXT: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %inc = add i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N11:0x[0-9a-f]*]]

; CHECK: Node Address:[[N11]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ]
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N10]]
; CHECK-NEXT: --- end of nodes in pi-block ---
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N1:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N6:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N7:0x[0-9a-f]*]]

; CHECK: Node Address:[[N7]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %exitcond = icmp ne i64 %inc, %n
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N8:0x[0-9a-f]*]]

; CHECK: Node Address:[[N8]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    br i1 %exitcond, label %test1.for.body, label %for.end.loopexit
; CHECK-NEXT: Edges:none!

; CHECK: Node Address:[[N6]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5:0x[0-9a-f]*]]

; CHECK: Node Address:[[N1]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
; CHECK-NEXT: Edges:
@@ -23,12 +60,6 @@
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %add = fadd float %0, %conv
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5:0x[0-9a-f]*]]

; CHECK: Node Address:[[N6:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5]]

; CHECK: Node Address:[[N5]]:single-instruction
@@ -36,36 +67,6 @@
; CHECK-NEXT:    store float %add, float* %arrayidx1, align 4
; CHECK-NEXT: Edges:none!

; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %exitcond = icmp ne i64 %inc, %n
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N8:0x[0-9a-f]*]]

; CHECK: Node Address:[[N8]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    br i1 %exitcond, label %test1.for.body, label %for.end.loopexit
; CHECK-NEXT: Edges:none!

; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:pi-block
; CHECK-NEXT: --- start of nodes in pi-block ---
; CHECK-NEXT: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %inc = add i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N11:0x[0-9a-f]*]]

; CHECK: Node Address:[[N11]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ]
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N10]]
; CHECK-NEXT: --- end of nodes in pi-block ---
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N1]]
; CHECK-NEXT:  [def-use] to [[N6]]
; CHECK-NEXT:  [def-use] to [[N7]]


;; No memory dependencies.
;; void test1(unsigned long n, float * restrict a, float * restrict b) {
@@ -96,78 +97,80 @@ for.end: ; preds = %test1.for.body, %en


; CHECK-LABEL: 'DDG' for loop 'test2.for.body':
; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N2:0x[0-9a-f]*]]

; CHECK: Node Address:[[N2]]:single-instruction
; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block
; CHECK-NEXT: --- start of nodes in pi-block ---
; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %0 = load float, float* %arrayidx, align 4
; CHECK-NEXT:    %inc = add i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N3:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N12:0x[0-9a-f]*]]

; CHECK: Node Address:[[N4:0x[0-9a-f]*]]:single-instruction
; CHECK: Node Address:[[N12]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT:    %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ]
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N11]]
; CHECK-NEXT: --- end of nodes in pi-block ---
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N1:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N4:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N7:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N8:0x[0-9a-f]*]]

; CHECK: Node Address:[[N5]]:single-instruction
; CHECK: Node Address:[[N8]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %1 = load float, float* %arrayidx1, align 4
; CHECK-NEXT:    %exitcond = icmp ne i64 %inc, %n
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N3]]
; CHECK-NEXT:  [memory] to [[N6:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N9:0x[0-9a-f]*]]

; CHECK: Node Address:[[N3]]:single-instruction
; CHECK: Node Address:[[N9]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %add = fadd float %0, %1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N6]]
; CHECK-NEXT:    br i1 %exitcond, label %test2.for.body, label %for.end.loopexit
; CHECK-NEXT: Edges:none!

; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction
; CHECK: Node Address:[[N7]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx2 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N6]]
; CHECK-NEXT:  [def-use] to [[N6:0x[0-9a-f]*]]

; CHECK: Node Address:[[N6]]:single-instruction
; CHECK: Node Address:[[N4]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    store float %add, float* %arrayidx2, align 4
; CHECK-NEXT: Edges:none!
; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5:0x[0-9a-f]*]]

; CHECK: Node Address:[[N8:0x[0-9a-f]*]]:single-instruction
; CHECK: Node Address:[[N5]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %exitcond = icmp ne i64 %inc, %n
; CHECK-NEXT:    %1 = load float, float* %arrayidx1, align 4
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N9:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N3:0x[0-9a-f]*]]
; CHECK-NEXT:  [memory] to [[N6]]

; CHECK: Node Address:[[N9]]:single-instruction
; CHECK: Node Address:[[N1]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    br i1 %exitcond, label %test2.for.body, label %for.end.loopexit
; CHECK-NEXT: Edges:none!
; CHECK-NEXT:    %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N2:0x[0-9a-f]*]]

; CHECK: Node Address:[[N10:0x[0-9a-f]*]]:pi-block
; CHECK-NEXT: --- start of nodes in pi-block ---
; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:single-instruction
; CHECK: Node Address:[[N2]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %inc = add i64 %i.02, 1
; CHECK-NEXT:    %0 = load float, float* %arrayidx, align 4
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N12:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N3]]

; CHECK: Node Address:[[N12]]:single-instruction
; CHECK: Node Address:[[N3]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ]
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N11]]
; CHECK-NEXT: --- end of nodes in pi-block ---
; CHECK-NEXT:    %add = fadd float %0, %1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N1]]
; CHECK-NEXT:  [def-use] to [[N4]]
; CHECK-NEXT:  [def-use] to [[N7]]
; CHECK-NEXT:  [def-use] to [[N8]]
; CHECK-NEXT:  [def-use] to [[N6]]

; CHECK: Node Address:[[N6]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    store float %add, float* %arrayidx2, align 4
; CHECK-NEXT: Edges:none!



;; Loop-independent memory dependencies.
Loading