Commit 1b6286b9 authored by Evgeniy Brevnov's avatar Evgeniy Brevnov
Browse files

[LV][NFC] Some refactoring and renaming to facilitate next change.

parent 8232497c
Loading
Loading
Loading
Loading
+80 −69
Original line number Diff line number Diff line
@@ -2687,8 +2687,10 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
                                                         BasicBlock *Bypass) {
  Value *Count = getOrCreateTripCount(L);
  BasicBlock *BB = L->getLoopPreheader();
  IRBuilder<> Builder(BB->getTerminator());
  // Reuse existing vector loop preheader for TC checks.
  // Note that new preheader block is generated for vector loop.
  BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
  IRBuilder<> Builder(TCCheckBlock->getTerminator());

  // Generate code to check if the loop's trip count is less than VF * UF, or
  // equal to it in case a scalar epilogue is required; this implies that the
@@ -2705,48 +2707,55 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
        P, Count, ConstantInt::get(Count->getType(), VF * UF),
        "min.iters.check");

  BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
  // Create new preheader for vector loop.
  LoopVectorPreHeader = TCCheckBlock->splitBasicBlock(
      TCCheckBlock->getTerminator(), "vector.ph");
  // Update dominator tree immediately if the generated block is a
  // LoopBypassBlock because SCEV expansions to generate loop bypass
  // checks may query it before the current function is finished.
  DT->addNewBlock(NewBB, BB);
  DT->addNewBlock(LoopVectorPreHeader, TCCheckBlock);
  if (L->getParentLoop())
    L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
  ReplaceInstWithInst(BB->getTerminator(),
                      BranchInst::Create(Bypass, NewBB, CheckMinIters));
  LoopBypassBlocks.push_back(BB);
    L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI);
  ReplaceInstWithInst(
      TCCheckBlock->getTerminator(),
      BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters));
  LoopBypassBlocks.push_back(TCCheckBlock);
}

void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
  BasicBlock *BB = L->getLoopPreheader();
  // Reuse existing vector loop preheader for SCEV checks.
  // Note that new preheader block is generated for vector loop.
  BasicBlock *const SCEVCheckBlock = LoopVectorPreHeader;

  // Generate the code to check that the SCEV assumptions that we made.
  // We want the new basic block to start at the first instruction in a
  // sequence of instructions that form a check.
  SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
                   "scev.check");
  Value *SCEVCheck =
      Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator());
  Value *SCEVCheck = Exp.expandCodeForPredicate(
      &PSE.getUnionPredicate(), SCEVCheckBlock->getTerminator());

  if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
    if (C->isZero())
      return;

  assert(!BB->getParent()->hasOptSize() &&
  assert(!SCEVCheckBlock->getParent()->hasOptSize() &&
         "Cannot SCEV check stride or overflow when optimizing for size");

  // Create a new block containing the stride check.
  BB->setName("vector.scevcheck");
  auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
  SCEVCheckBlock->setName("vector.scevcheck");
  // Create new preheader for vector loop.
  LoopVectorPreHeader = SCEVCheckBlock->splitBasicBlock(
      SCEVCheckBlock->getTerminator(), "vector.ph");
  // Update dominator tree immediately if the generated block is a
  // LoopBypassBlock because SCEV expansions to generate loop bypass
  // checks may query it before the current function is finished.
  DT->addNewBlock(NewBB, BB);
  DT->addNewBlock(LoopVectorPreHeader, SCEVCheckBlock);
  if (L->getParentLoop())
    L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
  ReplaceInstWithInst(BB->getTerminator(),
                      BranchInst::Create(Bypass, NewBB, SCEVCheck));
  LoopBypassBlocks.push_back(BB);
    L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI);
  ReplaceInstWithInst(
      SCEVCheckBlock->getTerminator(),
      BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheck));
  LoopBypassBlocks.push_back(SCEVCheckBlock);
  AddedSafetyChecks = true;
}

@@ -2755,7 +2764,9 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
  if (EnableVPlanNativePath)
    return;

  BasicBlock *BB = L->getLoopPreheader();
  // Reuse existing vector loop preheader for runtime memory checks.
  // Note that new preheader block is generated for vector loop.
  BasicBlock *const MemCheckBlock = L->getLoopPreheader();

  // Generate the code that checks in runtime if arrays overlap. We put the
  // checks into a separate block to make the more common case of few elements
@@ -2763,11 +2774,11 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
  Instruction *FirstCheckInst;
  Instruction *MemRuntimeCheck;
  std::tie(FirstCheckInst, MemRuntimeCheck) =
      Legal->getLAI()->addRuntimeChecks(BB->getTerminator());
      Legal->getLAI()->addRuntimeChecks(MemCheckBlock->getTerminator());
  if (!MemRuntimeCheck)
    return;

  if (BB->getParent()->hasOptSize()) {
  if (MemCheckBlock->getParent()->hasOptSize()) {
    assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
           "Cannot emit memory checks when optimizing for size, unless forced "
           "to vectorize.");
@@ -2781,18 +2792,21 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
    });
  }

  // Create a new block containing the memory check.
  BB->setName("vector.memcheck");
  auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
  MemCheckBlock->setName("vector.memcheck");
  // Create new preheader for vector loop.
  LoopVectorPreHeader = MemCheckBlock->splitBasicBlock(
      MemCheckBlock->getTerminator(), "vector.ph");
  // Update dominator tree immediately if the generated block is a
  // LoopBypassBlock because SCEV expansions to generate loop bypass
  // checks may query it before the current function is finished.
  DT->addNewBlock(NewBB, BB);
  DT->addNewBlock(LoopVectorPreHeader, MemCheckBlock);

  if (L->getParentLoop())
    L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
  ReplaceInstWithInst(BB->getTerminator(),
                      BranchInst::Create(Bypass, NewBB, MemRuntimeCheck));
  LoopBypassBlocks.push_back(BB);
    L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI);
  ReplaceInstWithInst(
      MemCheckBlock->getTerminator(),
      BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheck));
  LoopBypassBlocks.push_back(MemCheckBlock);
  AddedSafetyChecks = true;

  // We currently don't use LoopVersioning for the actual loop cloning but we
@@ -2923,12 +2937,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
   ...
   */

  BasicBlock *OldBasicBlock = OrigLoop->getHeader();
  BasicBlock *VectorPH = OrigLoop->getLoopPreheader();
  BasicBlock *ExitBlock = OrigLoop->getExitBlock();
  MDNode *OrigLoopID = OrigLoop->getLoopID();
  assert(VectorPH && "Invalid loop structure");
  assert(ExitBlock && "Must have an exit block");

  // Some loops have a single integer induction variable, while other loops
  // don't. One example is c++ iterators that often have multiple pointer
@@ -2945,12 +2954,18 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
  Type *IdxTy = Legal->getWidestInductionType();

  // Split the single block loop into the two loop structure described above.
  BasicBlock *VecBody =
      VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
  BasicBlock *MiddleBlock =
      VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
  BasicBlock *ScalarPH =
      MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
  LoopScalarBody = OrigLoop->getHeader();
  LoopVectorPreHeader = OrigLoop->getLoopPreheader();
  LoopExitBlock = OrigLoop->getExitBlock();
  assert(LoopVectorPreHeader && "Invalid loop structure");
  assert(LoopExitBlock && "Must have an exit block");

  LoopVectorBody = LoopVectorPreHeader->splitBasicBlock(
      LoopVectorPreHeader->getTerminator(), "vector.body");
  LoopMiddleBlock = LoopVectorBody->splitBasicBlock(
      LoopVectorBody->getTerminator(), "middle.block");
  LoopScalarPreHeader = LoopMiddleBlock->splitBasicBlock(
      LoopMiddleBlock->getTerminator(), "scalar.ph");

  // Create and register the new vector loop.
  Loop *Lp = LI->AllocateLoop();
@@ -2960,12 +2975,12 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
  // before calling any utilities such as SCEV that require valid LoopInfo.
  if (ParentLoop) {
    ParentLoop->addChildLoop(Lp);
    ParentLoop->addBasicBlockToLoop(ScalarPH, *LI);
    ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI);
    ParentLoop->addBasicBlockToLoop(LoopScalarPreHeader, *LI);
    ParentLoop->addBasicBlockToLoop(LoopMiddleBlock, *LI);
  } else {
    LI->addTopLevelLoop(Lp);
  }
  Lp->addBasicBlockToLoop(VecBody, *LI);
  Lp->addBasicBlockToLoop(LoopVectorBody, *LI);

  // Find the loop boundaries.
  Value *Count = getOrCreateTripCount(Lp);
@@ -2977,16 +2992,16 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
  // backedge-taken count is uint##_max: adding one to it will overflow leading
  // to an incorrect trip count of zero. In this (rare) case we will also jump
  // to the scalar loop.
  emitMinimumIterationCountCheck(Lp, ScalarPH);
  emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader);

  // Generate the code to check any assumptions that we've made for SCEV
  // expressions.
  emitSCEVChecks(Lp, ScalarPH);
  emitSCEVChecks(Lp, LoopScalarPreHeader);

  // Generate the code that checks in runtime if arrays overlap. We put the
  // checks into a separate block to make the more common case of few elements
  // faster.
  emitMemRuntimeChecks(Lp, ScalarPH);
  emitMemRuntimeChecks(Lp, LoopScalarPreHeader);

  // Generate the induction variable.
  // The loop step is equal to the vectorization factor (num of SIMD elements)
@@ -3014,8 +3029,9 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
    InductionDescriptor II = InductionEntry.second;

    // Create phi nodes to merge from the  backedge-taken check block.
    PHINode *BCResumeVal = PHINode::Create(
        OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
    PHINode *BCResumeVal =
        PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
                        LoopScalarPreHeader->getTerminator());
    // Copy original phi DL over to the new one.
    BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
    Value *&EndValue = IVEndValues[OrigPhi];
@@ -3028,21 +3044,21 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
      Instruction::CastOps CastOp =
          CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
      Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
      const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
      const DataLayout &DL = LoopScalarBody->getModule()->getDataLayout();
      EndValue = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II);
      EndValue->setName("ind.end");
    }

    // The new PHI merges the original incoming value, in case of a bypass,
    // or the value at the end of the vectorized loop.
    BCResumeVal->addIncoming(EndValue, MiddleBlock);
    BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);

    // Fix the scalar body counter (PHI node).
    // The old induction's phi node in the scalar body needs the truncated
    // value.
    for (BasicBlock *BB : LoopBypassBlocks)
      BCResumeVal->addIncoming(II.getStartValue(), BB);
    OrigPhi->setIncomingValueForBlock(ScalarPH, BCResumeVal);
    OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
  }

  // We need the OrigLoop (scalar loop part) latch terminator to help
@@ -3060,9 +3076,9 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
  // If tail is to be folded, we know we don't need to run the remainder.
  Value *CmpN = Builder.getTrue();
  if (!Cost->foldTailByMasking()) {
    CmpN =
        CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
                        CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
    CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
                           CountRoundDown, "cmp.n",
                           LoopMiddleBlock->getTerminator());

    // Here we use the same DebugLoc as the scalar loop latch branch instead
    // of the corresponding compare because they may have ended up with
@@ -3071,20 +3087,15 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
    cast<Instruction>(CmpN)->setDebugLoc(ScalarLatchBr->getDebugLoc());
  }

  BranchInst *BrInst = BranchInst::Create(ExitBlock, ScalarPH, CmpN);
  BranchInst *BrInst =
      BranchInst::Create(LoopExitBlock, LoopScalarPreHeader, CmpN);
  BrInst->setDebugLoc(ScalarLatchBr->getDebugLoc());
  ReplaceInstWithInst(MiddleBlock->getTerminator(), BrInst);
  ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);

  // Get ready to start creating new instructions into the vectorized body.
  Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());

  // Save the state.
  LoopVectorPreHeader = Lp->getLoopPreheader();
  LoopScalarPreHeader = ScalarPH;
  LoopMiddleBlock = MiddleBlock;
  LoopExitBlock = ExitBlock;
  LoopVectorBody = VecBody;
  LoopScalarBody = OldBasicBlock;
  assert(LoopVectorPreHeader == Lp->getLoopPreheader() &&
         "Inconsistent vector loop preheader");
  Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());

  Optional<MDNode *> VectorizedLoopID =
      makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,