Commit 100e797a authored by Gil Rapaport's avatar Gil Rapaport
Browse files

[LV] Apply sink-after & interleave-groups as VPlan transformations (NFC)

This recommits 2be17087 (reverted in
d3ec06d2 for heap-use-after-free) with a fix
in IAI's reset() which was not clearing the set of interleave groups after
deleting them.
parent 95a25d88
Loading
Loading
Loading
Loading
+3 −6
Original line number Diff line number Diff line
@@ -542,13 +542,10 @@ public:
  /// formation for predicated accesses, we may be able to relax this limitation
  /// in the future once we handle more complicated blocks.
  void reset() {
    SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
    // Avoid releasing a pointer twice.
    for (auto &I : InterleaveGroupMap)
      DelSet.insert(I.second);
    for (auto *Ptr : DelSet)
      delete Ptr;
    InterleaveGroupMap.clear();
    for (auto *Ptr : InterleaveGroups)
      delete Ptr;
    InterleaveGroups.clear();
    RequiresScalarEpilogue = false;
  }

+7 −2
Original line number Diff line number Diff line
@@ -201,6 +201,9 @@ class LoopVectorizationPlanner {
  /// The profitability analysis.
  LoopVectorizationCostModel &CM;

  /// The interleaved access analysis.
  InterleavedAccessInfo &IAI;

  SmallVector<VPlanPtr, 4> VPlans;

  /// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -223,8 +226,10 @@ public:
  LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
                           const TargetTransformInfo *TTI,
                           LoopVectorizationLegality *Legal,
                           LoopVectorizationCostModel &CM)
      : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
                           LoopVectorizationCostModel &CM,
                           InterleavedAccessInfo &IAI)
      : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
        IAI(IAI) {}

  /// Plan how to best vectorize, return the best VF and its cost, or None if
  /// vectorization and interleaving should be avoided up front.
+93 −110
Original line number Diff line number Diff line
@@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
  return BlockMaskCache[BB] = BlockMask;
}

VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
                                                           VFRange &Range,
                                                           VPlanPtr &Plan) {
  const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
  if (!IG)
    return nullptr;

  // Now check if IG is relevant for VF's in the given range.
  auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
    return [=](unsigned VF) -> bool {
      return (VF >= 2 && // Query is illegal for VF == 1
              CM.getWideningDecision(I, VF) ==
                  LoopVectorizationCostModel::CM_Interleave);
    };
  };
  if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range))
    return nullptr;

  // I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
  // range. If it's the primary member of the IG construct a VPInterleaveRecipe.
  // Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
  assert(I == IG->getInsertPos() &&
         "Generating a recipe for an adjunct member of an interleave group");

  VPValue *Mask = nullptr;
  if (Legal->isMaskRequired(I))
    Mask = createBlockInMask(I->getParent(), Plan);

  return new VPInterleaveRecipe(IG, Mask);
}

VPWidenMemoryInstructionRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
                                  VPlanPtr &Plan) {
@@ -6757,8 +6726,6 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
        CM.getWideningDecision(I, VF);
    assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
           "CM decision should be taken at this point.");
    assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
           "Interleave memory opportunity should be caught earlier.");
    return Decision != LoopVectorizationCostModel::CM_Scalarize;
  };

@@ -6923,15 +6890,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
  if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
    return false;

  // If this ingredient's recipe is to be recorded, keep its recipe a singleton
  // to avoid having to split recipes later.
  bool IsSingleton = Ingredient2Recipe.count(I);

  // Success: widen this instruction. We optimize the common case where
  // consecutive instructions can be represented by a single recipe.
  if (!VPBB->empty()) {
    VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
    if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
  if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
      LastExtensibleRecipe->appendInstruction(I))
    return true;
  }

  VPBB->appendRecipe(new VPWidenRecipe(I));
  VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
  if (!IsSingleton)
    LastExtensibleRecipe = WidenRecipe;
  setRecipe(I, WidenRecipe);
  VPBB->appendRecipe(WidenRecipe);
  return true;
}

@@ -6947,6 +6920,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
      [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);

  auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
  setRecipe(I, Recipe);

  // Find if I uses a predicated instruction. If so, it will use its scalar
  // value. Avoid hoisting the insert-element which packs the scalar value into
@@ -7005,36 +6979,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
                                        VPlanPtr &Plan, VPBasicBlock *VPBB) {
  VPRecipeBase *Recipe = nullptr;
  // Check if Instr should belong to an interleave memory recipe, or already
  // does. In the latter case Instr is irrelevant.
  if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
    VPBB->appendRecipe(Recipe);
    return true;
  }

  // Check if Instr is a memory operation that should be widened.
  if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
  // First, check for specific widening recipes that deal with memory
  // operations, inductions and Phi nodes.
  if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
      (Recipe = tryToOptimizeInduction(Instr, Range)) ||
      (Recipe = tryToBlend(Instr, Plan)) ||
      (isa<PHINode>(Instr) &&
       (Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) {
    setRecipe(Instr, Recipe);
    VPBB->appendRecipe(Recipe);
    return true;
  }

  // Check if Instr should form some PHI recipe.
  if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
    VPBB->appendRecipe(Recipe);
    return true;
  }
  if ((Recipe = tryToBlend(Instr, Plan))) {
    VPBB->appendRecipe(Recipe);
    return true;
  }
  if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
    VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
    return true;
  }

  // Check if Instr is to be widened by a general VPWidenRecipe, after
  // having first checked for specific widening recipes that deal with
  // Interleave Groups, Inductions and Phi nodes.
  // Check if Instr is to be widened by a general VPWidenRecipe.
  if (tryToWiden(Instr, VPBB, Range))
    return true;

@@ -7090,19 +7048,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
    VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
    SmallPtrSetImpl<Instruction *> &DeadInstructions) {

  // Hold a mapping from predicated instructions to their recipes, in order to
  // fix their AlsoPack behavior if a user is determined to replicate and use a
  // scalar instead of vector value.
  DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;

  DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
  DenseMap<Instruction *, Instruction *> SinkAfterInverse;

  SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;

  VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);

  // ---------------------------------------------------------------------------
  // Pre-construction: record ingredients whose recipes we'll need to further
  // process after constructing the initial VPlan.
  // ---------------------------------------------------------------------------

  // Mark instructions we'll need to sink later and their targets as
  // ingredients whose recipe we'll need to record.
  for (auto &Entry : SinkAfter) {
    RecipeBuilder.recordRecipeOf(Entry.first);
    RecipeBuilder.recordRecipeOf(Entry.second);
  }

  // For each interleave group which is relevant for this (possibly trimmed)
  // Range, add it to the set of groups to be later applied to the VPlan and add
  // placeholders for its members' Recipes which we'll be replacing with a
  // single VPInterleaveRecipe.
  for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
    auto applyIG = [IG, this](unsigned VF) -> bool {
      return (VF >= 2 && // Query is illegal for VF == 1
              CM.getWideningDecision(IG->getInsertPos(), VF) ==
                  LoopVectorizationCostModel::CM_Interleave);
    };
    if (!getDecisionAndClampRange(applyIG, Range))
      continue;
    InterleaveGroups.insert(IG);
    for (unsigned i = 0; i < IG->getFactor(); i++)
      if (Instruction *Member = IG->getMember(i))
        RecipeBuilder.recordRecipeOf(Member);
  };

  // ---------------------------------------------------------------------------
  // Build initial VPlan: Scan the body of the loop in a topological order to
  // visit each basic block after having visited its predecessor basic blocks.
  // ---------------------------------------------------------------------------

  // Create a dummy pre-entry VPBasicBlock to start building the VPlan.
  VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
  auto Plan = std::make_unique<VPlan>(VPBB);

  VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
  // Represent values that will have defs inside VPlan.
  for (Value *V : NeedDef)
    Plan->addVPValue(V);
@@ -7123,8 +7119,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(

    std::vector<Instruction *> Ingredients;

    // Organize the ingredients to vectorize from current basic block in the
    // right order.
    // Introduce each ingredient into VPlan.
    for (Instruction &I : BB->instructionsWithoutDebug()) {
      Instruction *Instr = &I;

@@ -7134,43 +7129,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
          DeadInstructions.find(Instr) != DeadInstructions.end())
        continue;

      // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
      // member of the IG, do not construct any Recipe for it.
      const InterleaveGroup<Instruction> *IG =
          CM.getInterleavedAccessGroup(Instr);
      if (IG && Instr != IG->getInsertPos() &&
          Range.Start >= 2 && // Query is illegal for VF == 1
          CM.getWideningDecision(Instr, Range.Start) ==
              LoopVectorizationCostModel::CM_Interleave) {
        auto SinkCandidate = SinkAfterInverse.find(Instr);
        if (SinkCandidate != SinkAfterInverse.end())
          Ingredients.push_back(SinkCandidate->second);
        continue;
      }

      // Move instructions to handle first-order recurrences, step 1: avoid
      // handling this instruction until after we've handled the instruction it
      // should follow.
      auto SAIt = SinkAfter.find(Instr);
      if (SAIt != SinkAfter.end()) {
        LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after"
                          << *SAIt->second
                          << " to vectorize a 1st order recurrence.\n");
        SinkAfterInverse[SAIt->second] = Instr;
        continue;
      }

      Ingredients.push_back(Instr);

      // Move instructions to handle first-order recurrences, step 2: push the
      // instruction to be sunk at its insertion point.
      auto SAInvIt = SinkAfterInverse.find(Instr);
      if (SAInvIt != SinkAfterInverse.end())
        Ingredients.push_back(SAInvIt->second);
    }

    // Introduce each ingredient into VPlan.
    for (Instruction *Instr : Ingredients) {
      if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
        continue;

@@ -7195,6 +7153,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
  VPBlockUtils::disconnectBlocks(PreEntry, Entry);
  delete PreEntry;

  // ---------------------------------------------------------------------------
  // Transform initial VPlan: Apply previously taken decisions, in order, to
  // bring the VPlan to its final state.
  // ---------------------------------------------------------------------------

  // Apply Sink-After legal constraints.
  for (auto &Entry : SinkAfter) {
    VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
    VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
    Sink->moveAfter(Target);
  }

  // Interleave memory: for each Interleave Group we marked earlier as relevant
  // for this VPlan, replace the Recipes widening its memory instructions with a
  // single VPInterleaveRecipe at its insertion point.
  for (auto IG : InterleaveGroups) {
    auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
        RecipeBuilder.getRecipe(IG->getInsertPos()));
    (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);

    for (unsigned i = 0; i < IG->getFactor(); ++i)
      if (Instruction *Member = IG->getMember(i)) {
        RecipeBuilder.getRecipe(Member)->eraseFromParent();
      }
  }

  // Finally, if tail is folded by masking, introduce selects between the phi
  // and the live-out instruction of each reduction, at the end of the latch.
  if (CM.foldTailByMasking()) {
@@ -7427,12 +7411,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
}

void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
  if (!User)
  VPValue *Mask = getMask();
  if (!Mask)
    return State.ILV->vectorizeMemoryInstruction(&Instr);

  // Last (and currently only) operand is a mask.
  InnerLoopVectorizer::VectorParts MaskValues(State.UF);
  VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
  for (unsigned Part = 0; Part < State.UF; ++Part)
    MaskValues[Part] = State.get(Mask, Part);
  State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
@@ -7473,7 +7456,7 @@ static bool processLoopInVPlanNativePath(
  // Use the planner for outer loop vectorization.
  // TODO: CM is not used at this point inside the planner. Turn CM into an
  // optional argument if we don't need it in the future.
  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);

  // Get user vectorization factor.
  const unsigned UserVF = Hints.getWidth();
@@ -7631,7 +7614,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
  CM.collectValuesToIgnore();

  // Use the planner for vectorization.
  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);

  // Get user vectorization factor.
  unsigned UserVF = Hints.getWidth();
+34 −10
Original line number Diff line number Diff line
@@ -47,6 +47,24 @@ class VPRecipeBuilder {
  EdgeMaskCacheTy EdgeMaskCache;
  BlockMaskCacheTy BlockMaskCache;

  // VPlan-VPlan transformations support: Hold a mapping from ingredients to
  // their recipe. To save on memory, only do so for selected ingredients,
  // marked by having a nullptr entry in this map. If those ingredients get a
  // VPWidenRecipe, also avoid compressing other ingredients into it to avoid
  // having to split such recipes later.
  DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
  VPWidenRecipe *LastExtensibleRecipe = nullptr;

  /// Set the recipe created for given ingredient. This operation is a no-op for
  /// ingredients that were not marked using a nullptr entry in the map.
  void setRecipe(Instruction *I, VPRecipeBase *R) {
    if (!Ingredient2Recipe.count(I))
      return;
    assert(Ingredient2Recipe[I] == nullptr &&
           "Recipe already set for ingredient");
    Ingredient2Recipe[I] = R;
  }

public:
  /// A helper function that computes the predicate of the block BB, assuming
  /// that the header block of the loop is set to True. It returns the *entry*
@@ -57,16 +75,22 @@ public:
  /// and DST.
  VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);

  /// Check if \I belongs to an Interleave Group within the given VF \p Range,
  /// \return true in the first returned value if so and false otherwise.
  /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG
  /// for \p Range.Start, and provide it as the second returned value.
  /// Note that if \I is an adjunct member of an IG for \p Range.Start, the
  /// \return value is <true, nullptr>, as it is handled by another recipe.
  /// \p Range.End may be decreased to ensure same decision from \p Range.Start
  /// to \p Range.End.
  VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
                                            VPlanPtr &Plan);
  /// Mark given ingredient for recording its recipe once one is created for
  /// it.
  void recordRecipeOf(Instruction *I) {
    assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) &&
           "Recipe already set for ingredient");
    Ingredient2Recipe[I] = nullptr;
  }

  /// Return the recipe created for given ingredient.
  VPRecipeBase *getRecipe(Instruction *I) {
    assert(Ingredient2Recipe.count(I) &&
           "Recording this ingredients recipe was not requested");
    assert(Ingredient2Recipe[I] != nullptr &&
           "Ingredient doesn't have a recipe");
    return Ingredient2Recipe[I];
  }

  /// Check if \I is a memory instruction to be widened for \p Range.Start and
  /// potentially masked. Such instructions are handled by a recipe that takes
+20 −3
Original line number Diff line number Diff line
@@ -275,18 +275,35 @@ void VPRegionBlock::execute(VPTransformState *State) {
}

void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
  assert(!Parent && "Recipe already in some VPBasicBlock");
  assert(InsertPos->getParent() &&
         "Insertion position not in any VPBasicBlock");
  Parent = InsertPos->getParent();
  Parent->getRecipeList().insert(InsertPos->getIterator(), this);
}

void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
  assert(!Parent && "Recipe already in some VPBasicBlock");
  assert(InsertPos->getParent() &&
         "Insertion position not in any VPBasicBlock");
  Parent = InsertPos->getParent();
  Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
}

void VPRecipeBase::removeFromParent() {
  assert(getParent() && "Recipe not in any VPBasicBlock");
  getParent()->getRecipeList().remove(getIterator());
  Parent = nullptr;
}

iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
  assert(getParent() && "Recipe not in any VPBasicBlock");
  return getParent()->getRecipeList().erase(getIterator());
}

void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
  InsertPos->getParent()->getRecipeList().splice(
      std::next(InsertPos->getIterator()), getParent()->getRecipeList(),
      getIterator());
  removeFromParent();
  insertAfter(InsertPos);
}

void VPInstruction::generateInstruction(VPTransformState &State,
Loading