Commit 2dd82a1c authored by Bardia Mahjour's avatar Bardia Mahjour
Browse files

[DDG] Data Dependence Graph - Topological Sort (Memory Leak Fix)

Summary:
This fixes the memory leak in bec37c3f
and re-delivers the reverted patch.
In this patch the DDG DAG is sorted topologically to put the
nodes in the graph in the order that would satisfy all
dependencies. This helps transformations that would like to
generate code based on the DDG. Since the DDG is a DAG a
reverse-post-order traversal would give us the topological
ordering. This patch also sorts the basic blocks passed to
the builder based on program order to ensure that the
dependencies are computed in the correct direction.

Authored By: bmahjour

Reviewer: Meinersbur, fhahn, myhsu, xtian, dmgreen, kbarton, jdoerfert

Reviewed By: Meinersbur

Subscribers: ychen, arphaman, simoll, a.elovikov, mgorny, hiraditya, jfb, wuzish, llvm-commits, jsji, Whitney, etiotto, ppc-slack

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70609
parent 970d9719
Loading
Loading
Loading
Loading
+8 −1
Original line number Diff line number Diff line
@@ -300,6 +300,7 @@ using DDGInfo = DependenceGraphInfo<DDGNode>;

/// Data Dependency Graph
class DataDependenceGraph : public DDGBase, public DDGInfo {
  friend AbstractDependenceGraphBuilder<DataDependenceGraph>;
  friend class DDGBuilder;

public:
@@ -311,7 +312,7 @@ public:
  DataDependenceGraph(DataDependenceGraph &&G)
      : DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
  DataDependenceGraph(Function &F, DependenceInfo &DI);
  DataDependenceGraph(const Loop &L, DependenceInfo &DI);
  DataDependenceGraph(Loop &L, LoopInfo &LI, DependenceInfo &DI);
  ~DataDependenceGraph();

  /// If node \p N belongs to a pi-block return a pointer to the pi-block,
@@ -381,6 +382,12 @@ public:
    return *E;
  }

  const NodeListType &getNodesInPiBlock(const DDGNode &N) final override {
    auto *PiNode = dyn_cast<const PiBlockDDGNode>(&N);
    assert(PiNode && "Expected a pi-block node.");
    return PiNode->getNodes();
  }

  bool shouldCreatePiBlocks() const final override;
};

+8 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ public:
    createMemoryDependencyEdges();
    createAndConnectRootNode();
    createPiBlocks();
    sortNodesTopologically();
  }

  /// Create fine grained nodes. These are typically atomic nodes that
@@ -84,6 +85,9 @@ public:
  /// the dependence graph into an acyclic graph.
  void createPiBlocks();

  /// Topologically sort the graph nodes.
  void sortNodesTopologically();

protected:
  /// Create the root node of the graph.
  virtual NodeType &createRootNode() = 0;
@@ -104,6 +108,10 @@ protected:
  /// Create a rooted edge going from \p Src to \p Tgt .
  virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0;

  /// Given a pi-block node, return a vector of all the nodes contained within
  /// it.
  virtual const NodeListType &getNodesInPiBlock(const NodeType &N) = 0;

  /// Deallocate memory of edge \p E.
  virtual void destroyEdge(EdgeType &E) { delete &E; }

+16 −5
Original line number Diff line number Diff line
@@ -9,7 +9,9 @@
// The implementation for the data dependence graph.
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DDG.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;
@@ -179,19 +181,28 @@ using BasicBlockListType = SmallVector<BasicBlock *, 8>;

DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
    : DependenceGraphInfo(F.getName().str(), D) {
  // Put the basic blocks in program order for correct dependence
  // directions.
  BasicBlockListType BBList;
  for (auto &BB : F.getBasicBlockList())
    BBList.push_back(&BB);
  for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
    for (BasicBlock * BB : SCC)
      BBList.push_back(BB);
  std::reverse(BBList.begin(), BBList.end());
  DDGBuilder(*this, D, BBList).populate();
}

DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D)
DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI,
                                         DependenceInfo &D)
    : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." +
                                L.getHeader()->getName())
                              .str(),
                          D) {
  // Put the basic blocks in program order for correct dependence
  // directions.
  LoopBlocksDFS DFS(&L);
  DFS.perform(&LI);
  BasicBlockListType BBList;
  for (BasicBlock *BB : L.blocks())
  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
    BBList.push_back(BB);
  DDGBuilder(*this, D, BBList).populate();
}
@@ -259,7 +270,7 @@ DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
                                     LoopStandardAnalysisResults &AR) {
  Function *F = L.getHeader()->getParent();
  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
  return std::make_unique<DataDependenceGraph>(L, DI);
  return std::make_unique<DataDependenceGraph>(L, AR.LI, DI);
}
AnalysisKey DDGAnalysis::Key;

+29 −0
Original line number Diff line number Diff line
@@ -353,5 +353,34 @@ void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
  }
}

template <class G>
void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {

  // If we don't create pi-blocks, then we may not have a DAG.
  if (!shouldCreatePiBlocks())
    return;

  SmallVector<NodeType *, 64> NodesInPO;
  using NodeKind = typename NodeType::NodeKind;
  for (NodeType *N : post_order(&Graph)) {
    if (N->getKind() == NodeKind::PiBlock) {
      // Put members of the pi-block right after the pi-block itself, for
      // convenience.
      const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
      NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
                       PiBlockMembers.end());
    }
    NodesInPO.push_back(N);
  }

  size_t OldSize = Graph.Nodes.size();
  Graph.Nodes.clear();
  for (NodeType *N : reverse(NodesInPO))
    Graph.Nodes.push_back(N);
  if (Graph.Nodes.size() != OldSize)
    assert(false &&
           "Expected the number of nodes to stay the same after the sort");
}

template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
template class llvm::DependenceGraphInfo<DDGNode>;
+84 −81
Original line number Diff line number Diff line
; RUN: opt < %s -disable-output "-passes=print<ddg>" 2>&1 | FileCheck %s

; CHECK-LABEL: 'DDG' for loop 'test1.for.body':
; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction

; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block
; CHECK-NEXT: --- start of nodes in pi-block ---
; CHECK-NEXT: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %inc = add i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N11:0x[0-9a-f]*]]

; CHECK: Node Address:[[N11]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ]
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N10]]
; CHECK-NEXT: --- end of nodes in pi-block ---
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N1:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N6:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N7:0x[0-9a-f]*]]

; CHECK: Node Address:[[N7]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %exitcond = icmp ne i64 %inc, %n
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N8:0x[0-9a-f]*]]

; CHECK: Node Address:[[N8]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    br i1 %exitcond, label %test1.for.body, label %for.end.loopexit
; CHECK-NEXT: Edges:none!

; CHECK: Node Address:[[N6]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5:0x[0-9a-f]*]]

; CHECK: Node Address:[[N1]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
; CHECK-NEXT: Edges:
@@ -23,12 +60,6 @@
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %add = fadd float %0, %conv
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5:0x[0-9a-f]*]]

; CHECK: Node Address:[[N6:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5]]

; CHECK: Node Address:[[N5]]:single-instruction
@@ -36,36 +67,6 @@
; CHECK-NEXT:    store float %add, float* %arrayidx1, align 4
; CHECK-NEXT: Edges:none!

; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %exitcond = icmp ne i64 %inc, %n
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N8:0x[0-9a-f]*]]

; CHECK: Node Address:[[N8]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    br i1 %exitcond, label %test1.for.body, label %for.end.loopexit
; CHECK-NEXT: Edges:none!

; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:pi-block
; CHECK-NEXT: --- start of nodes in pi-block ---
; CHECK-NEXT: Node Address:[[N10:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %inc = add i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N11:0x[0-9a-f]*]]

; CHECK: Node Address:[[N11]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %test1.for.body.preheader ]
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N10]]
; CHECK-NEXT: --- end of nodes in pi-block ---
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N1]]
; CHECK-NEXT:  [def-use] to [[N6]]
; CHECK-NEXT:  [def-use] to [[N7]]


;; No memory dependencies.
;; void test1(unsigned long n, float * restrict a, float * restrict b) {
@@ -96,78 +97,80 @@ for.end: ; preds = %test1.for.body, %en


; CHECK-LABEL: 'DDG' for loop 'test2.for.body':
; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N2:0x[0-9a-f]*]]

; CHECK: Node Address:[[N2]]:single-instruction
; CHECK: Node Address:[[PI:0x[0-9a-f]*]]:pi-block
; CHECK-NEXT: --- start of nodes in pi-block ---
; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %0 = load float, float* %arrayidx, align 4
; CHECK-NEXT:    %inc = add i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N3:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N12:0x[0-9a-f]*]]

; CHECK: Node Address:[[N4:0x[0-9a-f]*]]:single-instruction
; CHECK: Node Address:[[N12]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT:    %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ]
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N11]]
; CHECK-NEXT: --- end of nodes in pi-block ---
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N1:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N4:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N7:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N8:0x[0-9a-f]*]]

; CHECK: Node Address:[[N5]]:single-instruction
; CHECK: Node Address:[[N8]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %1 = load float, float* %arrayidx1, align 4
; CHECK-NEXT:    %exitcond = icmp ne i64 %inc, %n
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N3]]
; CHECK-NEXT:  [memory] to [[N6:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N9:0x[0-9a-f]*]]

; CHECK: Node Address:[[N3]]:single-instruction
; CHECK: Node Address:[[N9]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %add = fadd float %0, %1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N6]]
; CHECK-NEXT:    br i1 %exitcond, label %test2.for.body, label %for.end.loopexit
; CHECK-NEXT: Edges:none!

; CHECK: Node Address:[[N7:0x[0-9a-f]*]]:single-instruction
; CHECK: Node Address:[[N7]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %arrayidx2 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N6]]
; CHECK-NEXT:  [def-use] to [[N6:0x[0-9a-f]*]]

; CHECK: Node Address:[[N6]]:single-instruction
; CHECK: Node Address:[[N4]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    store float %add, float* %arrayidx2, align 4
; CHECK-NEXT: Edges:none!
; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N5:0x[0-9a-f]*]]

; CHECK: Node Address:[[N8:0x[0-9a-f]*]]:single-instruction
; CHECK: Node Address:[[N5]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %exitcond = icmp ne i64 %inc, %n
; CHECK-NEXT:    %1 = load float, float* %arrayidx1, align 4
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N9:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N3:0x[0-9a-f]*]]
; CHECK-NEXT:  [memory] to [[N6]]

; CHECK: Node Address:[[N9]]:single-instruction
; CHECK: Node Address:[[N1]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    br i1 %exitcond, label %test2.for.body, label %for.end.loopexit
; CHECK-NEXT: Edges:none!
; CHECK-NEXT:    %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N2:0x[0-9a-f]*]]

; CHECK: Node Address:[[N10:0x[0-9a-f]*]]:pi-block
; CHECK-NEXT: --- start of nodes in pi-block ---
; CHECK: Node Address:[[N11:0x[0-9a-f]*]]:single-instruction
; CHECK: Node Address:[[N2]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %inc = add i64 %i.02, 1
; CHECK-NEXT:    %0 = load float, float* %arrayidx, align 4
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N12:0x[0-9a-f]*]]
; CHECK-NEXT:  [def-use] to [[N3]]

; CHECK: Node Address:[[N12]]:single-instruction
; CHECK: Node Address:[[N3]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %test2.for.body.preheader ]
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N11]]
; CHECK-NEXT: --- end of nodes in pi-block ---
; CHECK-NEXT:    %add = fadd float %0, %1
; CHECK-NEXT: Edges:
; CHECK-NEXT:  [def-use] to [[N1]]
; CHECK-NEXT:  [def-use] to [[N4]]
; CHECK-NEXT:  [def-use] to [[N7]]
; CHECK-NEXT:  [def-use] to [[N8]]
; CHECK-NEXT:  [def-use] to [[N6]]

; CHECK: Node Address:[[N6]]:single-instruction
; CHECK-NEXT: Instructions:
; CHECK-NEXT:    store float %add, float* %arrayidx2, align 4
; CHECK-NEXT: Edges:none!



;; Loop-independent memory dependencies.
Loading