Commit ead81592 authored by Kazu Hirata's avatar Kazu Hirata
Browse files

[JumpThreading] Thread jumps through two basic blocks

Summary:
This patch teaches JumpThreading.cpp to thread through two basic
blocks like:

  bb3:
    %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
    %tobool = icmp eq i32 %cond, 0
    br i1 %tobool, label %bb4, label ...

  bb4:
    %cmp = icmp eq i32* %var, null
    br i1 %cmp, label bb5, label bb6

by duplicating basic blocks like bb3 above.  Once we duplicate bb3 as
bb3.dup and redirect edge bb2->bb3 to bb2->bb3.dup, we have:

  bb3:
    %var = phi i32* [ @a, %bb2 ]
    %tobool = icmp eq i32 %cond, 0
    br i1 %tobool, label %bb4, label ...

  bb3.dup:
    %var = phi i32* [ null, %bb1 ]
    %tobool = icmp eq i32 %cond, 0
    br i1 %tobool, label %bb4, label ...

  bb4:
    %cmp = icmp eq i32* %var, null
    br i1 %cmp, label bb5, label bb6

Then the existing code in JumpThreading.cpp can thread edge
bb3.dup->bb4 through bb4 and eventually create bb3.dup->bb5.

Reviewers: wmi

Subscribers: hiraditya, jfb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70247
parent dac7b23c
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -139,6 +139,11 @@ public:
                                               RecursionSet, CxtI);
  }

  Constant *EvaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB,
                                      Value *cond);
  bool MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond);
  void ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB,
                                   BasicBlock *BB, BasicBlock *SuccBB);
  bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
                              jumpthreading::ConstantPreference Preference,
                              Instruction *CxtI = nullptr);
+228 −2
Original line number Diff line number Diff line
@@ -1548,6 +1548,52 @@ FindMostPopularDest(BasicBlock *BB,
  return MostPopularDest;
}

// Try to evaluate the value of V when the control flows from PredPredBB to
// BB->getSinglePredecessor() and then on to BB.
Constant *JumpThreadingPass::EvaluateOnPredecessorEdge(BasicBlock *BB,
                                                       BasicBlock *PredPredBB,
                                                       Value *V) {
  BasicBlock *PredBB = BB->getSinglePredecessor();
  assert(PredBB && "Expected a single predecessor");

  if (Constant *Cst = dyn_cast<Constant>(V)) {
    return Cst;
  }

  // Consult LVI if V is not an instruction in BB or PredBB.
  Instruction *I = dyn_cast<Instruction>(V);
  if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
    if (DTU->hasPendingDomTreeUpdates())
      LVI->disableDT();
    else
      LVI->enableDT();
    return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
  }

  // Look into a PHI argument.
  if (PHINode *PHI = dyn_cast<PHINode>(V)) {
    if (PHI->getParent() == PredBB)
      return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
    return nullptr;
  }

  // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
  if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
    if (CondCmp->getParent() == BB) {
      Constant *Op0 =
          EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
      Constant *Op1 =
          EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
      if (Op0 && Op1) {
        return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
      }
    }
    return nullptr;
  }

  return nullptr;
}

bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
                                               ConstantPreference Preference,
                                               Instruction *CxtI) {
@@ -1557,8 +1603,12 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
    return false;

  PredValueInfoTy PredValues;
  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
    return false;
  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
                                       CxtI)) {
    // We don't have known values in predecessors.  See if we can thread through
    // BB and its sole predecessor.
    return MaybeThreadThroughTwoBasicBlocks(BB, Cond);
  }

  assert(!PredValues.empty() &&
         "ComputeValueKnownInPredecessors returned true with no values");
@@ -2015,6 +2065,182 @@ JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
  return ValueMapping;
}

/// Attempt to thread through two successive basic blocks.
bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
                                                         Value *Cond) {
  // Consider:
  //
  // PredBB:
  //   %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
  //   %tobool = icmp eq i32 %cond, 0
  //   br i1 %tobool, label %BB, label ...
  //
  // BB:
  //   %cmp = icmp eq i32* %var, null
  //   br i1 %cmp, label ..., label ...
  //
  // We don't know the value of %var at BB even if we know which incoming edge
  // we take to BB.  However, once we duplicate PredBB for each of its incoming
  // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
  // PredBB.  Then we can thread edges PredBB1->BB and PredBB2->BB through BB.

  // Require that BB end with a Branch for simplicity.
  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
  if (!CondBr)
    return false;

  // BB must have exactly one predecessor.
  BasicBlock *PredBB = BB->getSinglePredecessor();
  if (!PredBB)
    return false;

  // Require that PredBB end with a Branch.  If PredBB ends with an
  // unconditional branch, we should be merging PredBB and BB instead.  For
  // simplicity, we don't deal with a switch.
  BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
  if (!PredBBBranch)
    return false;

  // If PredBB has exactly one incoming edge, we don't gain anything by copying
  // PredBB.
  if (PredBB->getSinglePredecessor())
    return false;

  // Don't thread across a loop header.
  if (LoopHeaders.count(PredBB))
    return false;

  // Find a predecessor that we can thread.  For simplicity, we only consider a
  // successor edge out of BB to which we thread exactly one incoming edge into
  // PredBB.
  unsigned ZeroCount = 0;
  unsigned OneCount = 0;
  BasicBlock *ZeroPred = nullptr;
  BasicBlock *OnePred = nullptr;
  for (BasicBlock *P : predecessors(PredBB)) {
    if (Constant *Cst = EvaluateOnPredecessorEdge(BB, P, Cond)) {
      if (Cst->isZeroValue()) {
        ZeroCount++;
        ZeroPred = P;
      } else {
        OneCount++;
        OnePred = P;
      }
    }
  }

  // Disregard complicated cases where we have to thread multiple edges.
  BasicBlock *PredPredBB;
  if (ZeroCount == 1) {
    PredPredBB = ZeroPred;
  } else if (OneCount == 1) {
    PredPredBB = OnePred;
  } else {
    return false;
  }

  BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);

  // If threading to the same block as we come from, we would infinite loop.
  if (SuccBB == BB) {
    LLVM_DEBUG(dbgs() << "  Not threading across BB '" << BB->getName()
                      << "' - would thread to self!\n");
    return false;
  }

  // If threading this would thread across a loop header, don't thread the edge.
  // See the comments above FindLoopHeaders for justifications and caveats.
  if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
    LLVM_DEBUG({
      bool BBIsHeader = LoopHeaders.count(BB);
      bool SuccIsHeader = LoopHeaders.count(SuccBB);
      dbgs() << "  Not threading across "
             << (BBIsHeader ? "loop header BB '" : "block BB '")
             << BB->getName() << "' to dest "
             << (SuccIsHeader ? "loop header BB '" : "block BB '")
             << SuccBB->getName()
             << "' - it might create an irreducible loop!\n";
    });
    return false;
  }

  // Check the cost of duplicating BB and PredBB.
  unsigned JumpThreadCost =
      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
  JumpThreadCost += getJumpThreadDuplicationCost(
      PredBB, PredBB->getTerminator(), BBDupThreshold);
  if (JumpThreadCost > BBDupThreshold) {
    LLVM_DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
                      << "' - Cost is too high: " << JumpThreadCost << "\n");
    return false;
  }

  // Now we are ready to duplicate PredBB.
  ThreadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
  return true;
}

void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
                                                    BasicBlock *PredBB,
                                                    BasicBlock *BB,
                                                    BasicBlock *SuccBB) {
  LLVM_DEBUG(dbgs() << "  Threading through '" << PredBB->getName() << "' and '"
                    << BB->getName() << "'\n");

  BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
  BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());

  BasicBlock *NewBB =
      BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
                         PredBB->getParent(), PredBB);
  NewBB->moveAfter(PredBB);

  // Set the block frequency of NewBB.
  if (HasProfileData) {
    auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
                     BPI->getEdgeProbability(PredPredBB, PredBB);
    BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
  }

  // We are going to have to map operands from the original BB block to the new
  // copy of the block 'NewBB'.  If there are PHI nodes in PredBB, evaluate them
  // to account for entry from PredPredBB.
  DenseMap<Instruction *, Value *> ValueMapping =
      CloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);

  // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
  // This eliminates predecessors from PredPredBB, which requires us to simplify
  // any PHI nodes in PredBB.
  Instruction *PredPredTerm = PredPredBB->getTerminator();
  for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
    if (PredPredTerm->getSuccessor(i) == PredBB) {
      PredBB->removePredecessor(PredPredBB, true);
      PredPredTerm->setSuccessor(i, NewBB);
    }

  AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
                                  ValueMapping);
  AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
                                  ValueMapping);

  DTU->applyUpdatesPermissive(
      {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
       {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
       {DominatorTree::Insert, PredPredBB, NewBB},
       {DominatorTree::Delete, PredPredBB, PredBB}});

  UpdateSSA(PredBB, NewBB, ValueMapping);

  // Clean up things like PHI nodes with single operands, dead instructions,
  // etc.
  SimplifyInstructionsInBlock(NewBB, TLI);
  SimplifyInstructionsInBlock(PredBB, TLI);

  SmallVector<BasicBlock *, 1> PredsToFactor;
  PredsToFactor.push_back(NewBB);
  ThreadEdge(BB, PredsToFactor, SuccBB);
}

/// TryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool JumpThreadingPass::TryThreadEdge(
    BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
+59 −0
Original line number Diff line number Diff line
; RUN: opt < %s -jump-threading -S -verify | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@a = global i32 0, align 4

define void @foo(i32 %cond1, i32 %cond2) {
; CHECK-LABEL: @foo
; CHECK-LABEL: entry
entry:
  %tobool = icmp eq i32 %cond1, 0
  br i1 %tobool, label %bb.cond2, label %bb.f1

bb.f1:
  call void @f1()
  br label %bb.cond2
; Verify that we branch on cond2 without checking ptr.
; CHECK:      call void @f1()
; CHECK-NEXT: icmp eq i32 %cond2, 0
; CHECK-NEXT: label %bb.f4, label %bb.f2

bb.cond2:
  %ptr = phi i32* [ null, %bb.f1 ], [ @a, %entry ]
  %tobool1 = icmp eq i32 %cond2, 0
  br i1 %tobool1, label %bb.file, label %bb.f2
; Verify that we branch on cond2 without checking ptr.
; CHECK:      icmp eq i32 %cond2, 0
; CHECK-NEXT: label %bb.f3, label %bb.f2

bb.f2:
  call void @f2()
  br label %exit

; Verify that we eliminate this basic block.
; CHECK-NOT: bb.file:
bb.file:
  %cmp = icmp eq i32* %ptr, null
  br i1 %cmp, label %bb.f4, label %bb.f3

bb.f3:
  call void @f3()
  br label %exit

bb.f4:
  call void @f4()
  br label %exit

exit:
  ret void
}

declare void @f1()

declare void @f2()

declare void @f3()

declare void @f4()
+56 −0
Original line number Diff line number Diff line
; RUN: opt < %s -jump-threading -S -verify | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @foo(i32 %cond1, i32 %cond2) {
; CHECK-LABEL: @foo
; CHECK-LABEL: entry
entry:
  %tobool = icmp ne i32 %cond1, 0
  br i1 %tobool, label %bb.f1, label %bb.f2

bb.f1:
  call void @f1()
  br label %bb.cond2
; Verify that we branch on cond2 without checking tobool again.
; CHECK:      call void @f1()
; CHECK-NEXT: icmp eq i32 %cond2, 0
; CHECK-NEXT: label %exit, label %bb.f3

bb.f2:
  call void @f2()
  br label %bb.cond2
; Verify that we branch on cond2 without checking tobool again.
; CHECK:      call void @f2()
; CHECK-NEXT: icmp eq i32 %cond2, 0
; CHECK-NEXT: label %exit, label %bb.f4

bb.cond2:
  %tobool1 = icmp eq i32 %cond2, 0
  br i1 %tobool1, label %exit, label %bb.cond1again

; Verify that we eliminate this basic block.
; CHECK-NOT: bb.cond1again:
bb.cond1again:
  br i1 %tobool, label %bb.f3, label %bb.f4

bb.f3:
  call void @f3()
  br label %exit

bb.f4:
  call void @f4()
  br label %exit

exit:
  ret void
}

declare void @f1() local_unnamed_addr

declare void @f2() local_unnamed_addr

declare void @f3() local_unnamed_addr

declare void @f4() local_unnamed_addr