Commit b19ec1eb authored by Taewook Oh's avatar Taewook Oh Committed by taewookoh
Browse files

[BPI] Improve unreachable/ColdCall heurstics to handle loops.

Summary:
While updatePostDominatedByUnreachable attemps to find basic blocks that are post-domianted by unreachable blocks, it currently cannot handle loops precisely, because it doesn't use the actual post dominator tree analysis but relies on heuristics of visiting basic blocks in post-order. More precisely, when the entire loop is post-dominated by the unreachable block, current algorithm fails to detect the entire loop as post-dominated by the unreachable because when the algorithm reaches to the loop latch it fails to tell all its successors (including the loop header) will "eventually" be post-domianted by the unreachable block, because the algorithm hasn't visited the loop header yet. This makes BPI for the loop latch to assume that loop backedges are taken with 100% of probability. And because of this, block frequency info sometimes marks virtually dead loops (which are post dominated by unreachable blocks) super hot, because 100% backedge-taken probability makes the loop iteration count the max value. updatePostDominatedByColdCall has the exact same problem as well.

To address this problem, this patch makes PostDominatedByUnreachable/PostDominatedByColdCall to be computed with the actual post-dominator tree.

Reviewers: skatkov, chandlerc, manmanren

Reviewed By: skatkov

Subscribers: manmanren, vsk, apilipenko, Carrot, qcolombet, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70104
parent b208088a
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ namespace llvm {
class Function;
class LoopInfo;
class raw_ostream;
class PostDominatorTree;
class TargetLibraryInfo;
class Value;

@@ -187,8 +188,10 @@ private:
  /// Track the set of blocks that always lead to a cold call.
  SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall;

  void updatePostDominatedByUnreachable(const BasicBlock *BB);
  void updatePostDominatedByColdCall(const BasicBlock *BB);
  void computePostDominatedByUnreachable(const Function &F,
                                         PostDominatorTree *PDT);
  void computePostDominatedByColdCall(const Function &F,
                                      PostDominatorTree *PDT);
  bool calcUnreachableHeuristics(const BasicBlock *BB);
  bool calcMetadataWeights(const BasicBlock *BB);
  bool calcColdCallHeuristics(const BasicBlock *BB);
+75 −57
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -146,68 +147,82 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
/// instruction. This is essentially never taken.
static const uint32_t IH_NONTAKEN_WEIGHT = 1;

/// Add \p BB to PostDominatedByUnreachable set if applicable.
void
BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
  const Instruction *TI = BB->getTerminator();
static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
                              SmallVectorImpl<const BasicBlock *> &WorkList,
                              SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
  SmallVector<BasicBlock *, 8> Descendants;
  SmallPtrSet<const BasicBlock *, 16> NewItems;

  PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
  for (auto *BB : Descendants)
    if (TargetSet.insert(BB).second)
      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
        if (!TargetSet.count(*PI))
          NewItems.insert(*PI);
  WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
}

/// Compute a set of basic blocks that are post-dominated by unreachables.
void BranchProbabilityInfo::computePostDominatedByUnreachable(
    const Function &F, PostDominatorTree *PDT) {
  SmallVector<const BasicBlock *, 8> WorkList;
  for (auto &BB : F) {
    const Instruction *TI = BB.getTerminator();
    if (TI->getNumSuccessors() == 0) {
      if (isa<UnreachableInst>(TI) ||
          // If this block is terminated by a call to
        // @llvm.experimental.deoptimize then treat it like an unreachable since
        // the @llvm.experimental.deoptimize call is expected to practically
        // never execute.
        BB->getTerminatingDeoptimizeCall())
      PostDominatedByUnreachable.insert(BB);
    return;
          // @llvm.experimental.deoptimize then treat it like an unreachable
          // since the @llvm.experimental.deoptimize call is expected to
          // practically never execute.
          BB.getTerminatingDeoptimizeCall())
        UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
    }
  }

  // If the terminator is an InvokeInst, check only the normal destination block
  // as the unwind edge of InvokeInst is also very unlikely taken.
  if (auto *II = dyn_cast<InvokeInst>(TI)) {
  while (!WorkList.empty()) {
    const BasicBlock *BB = WorkList.pop_back_val();
    if (PostDominatedByUnreachable.count(BB))
      continue;
    // If the terminator is an InvokeInst, check only the normal destination
    // block as the unwind edge of InvokeInst is also very unlikely taken.
    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
      if (PostDominatedByUnreachable.count(II->getNormalDest()))
      PostDominatedByUnreachable.insert(BB);
    return;
        UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
    }
    // If all the successors are unreachable, BB is unreachable as well.
    else if (!successors(BB).empty() &&
             llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
               return PostDominatedByUnreachable.count(Succ);
             }))
      UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
  }

  for (auto *I : successors(BB))
    // If any of successor is not post dominated then BB is also not.
    if (!PostDominatedByUnreachable.count(I))
      return;

  PostDominatedByUnreachable.insert(BB);
}

/// Add \p BB to PostDominatedByColdCall set if applicable.
void
BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
  assert(!PostDominatedByColdCall.count(BB));
  const Instruction *TI = BB->getTerminator();
  if (TI->getNumSuccessors() == 0)
    return;
/// compute a set of basic blocks that are post-dominated by ColdCalls.
void BranchProbabilityInfo::computePostDominatedByColdCall(
    const Function &F, PostDominatorTree *PDT) {
  SmallVector<const BasicBlock *, 8> WorkList;
  for (auto &BB : F)
    for (auto &I : BB)
      if (const CallInst *CI = dyn_cast<CallInst>(&I))
        if (CI->hasFnAttr(Attribute::Cold))
          UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);

  // If all of successor are post dominated then BB is also done.
  if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) {
        return PostDominatedByColdCall.count(SuccBB);
      })) {
    PostDominatedByColdCall.insert(BB);
    return;
  }
  while (!WorkList.empty()) {
    const BasicBlock *BB = WorkList.pop_back_val();

    // If the terminator is an InvokeInst, check only the normal destination
    // block as the unwind edge of InvokeInst is also very unlikely taken.
  if (auto *II = dyn_cast<InvokeInst>(TI))
    if (PostDominatedByColdCall.count(II->getNormalDest())) {
      PostDominatedByColdCall.insert(BB);
      return;
    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
      if (PostDominatedByColdCall.count(II->getNormalDest()))
        UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
    }

  // Otherwise, if the block itself contains a cold function, add it to the
  // set of blocks post-dominated by a cold call.
  for (auto &I : *BB)
    if (const CallInst *CI = dyn_cast<CallInst>(&I))
      if (CI->hasFnAttr(Attribute::Cold)) {
        PostDominatedByColdCall.insert(BB);
        return;
    // If all of successor are post dominated then BB is also done.
    else if (!successors(BB).empty() &&
             llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
               return PostDominatedByColdCall.count(Succ);
             }))
      UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
  }
}

@@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
    LLVM_DEBUG(dbgs() << "\n");
  }

  std::unique_ptr<PostDominatorTree> PDT =
      std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
  computePostDominatedByUnreachable(F, PDT.get());
  computePostDominatedByColdCall(F, PDT.get());

  // Walk the basic blocks in post-order so that we can build up state about
  // the successors of a block iteratively.
  for (auto BB : post_order(&F.getEntryBlock())) {
    LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
                      << "\n");
    updatePostDominatedByUnreachable(BB);
    updatePostDominatedByColdCall(BB);
    // If there is no at least two successors, no sense to set probability.
    if (BB->getTerminator()->getNumSuccessors() < 2)
      continue;
+18 −0
Original line number Diff line number Diff line
@@ -141,6 +141,24 @@ exit:
  ret i32 %result
}

define i32 @test_cold_loop(i32 %a, i32 %b) {
entry:
  %cond1 = icmp eq i32 %a, 42
  br i1 %cond1, label %header, label %exit

header:
  br label %body

body:
  %cond2 = icmp eq i32 %b, 42
  br i1 %cond2, label %header, label %exit
; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00%

exit:
  call void @coldfunc()
  ret i32 %b
}

declare i32 @regular_function(i32 %i)

define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) {
+26 −0
Original line number Diff line number Diff line
@@ -79,6 +79,32 @@ exit:
  ret i32 %b
}

define i32 @test4(i32 %a, i32 %b) {
; CHECK: Printing analysis {{.*}} for function 'test4'
; Make sure we handle loops post-dominated by unreachables.
entry:
  %cond1 = icmp eq i32 %a, 42
  br i1 %cond1, label %header, label %exit
; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00%
; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge]

header:
  br label %body

body:
  %cond2 = icmp eq i32 %a, 42
  br i1 %cond2, label %header, label %abort
; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00%
; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00%

abort:
  call void @abort() noreturn
  unreachable

exit:
  ret i32 %b
}

@_ZTIi = external global i8*

; CHECK-LABEL: throwSmallException
+2 −2
Original line number Diff line number Diff line
@@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) {
; CHECK: %loop.header
; CHECK: %loop.body1
; CHECK: %loop.body2
; CHECK: %loop.body3
; CHECK: %loop.inner1.begin
; CHECK: %loop.body4
; CHECK: %loop.inner2.begin
; CHECK: %loop.inner2.begin
; CHECK: %loop.body3
; CHECK: %loop.inner1.begin
; CHECK: %bail

entry:
Loading