Loading llvm/include/llvm/Analysis/VectorUtils.h +3 −6 Original line number Diff line number Diff line Loading @@ -542,13 +542,10 @@ public: /// formation for predicated accesses, we may be able to relax this limitation /// in the future once we handle more complicated blocks. void reset() { SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet; // Avoid releasing a pointer twice. for (auto &I : InterleaveGroupMap) DelSet.insert(I.second); for (auto *Ptr : DelSet) delete Ptr; InterleaveGroupMap.clear(); for (auto *Ptr : InterleaveGroups) delete Ptr; InterleaveGroups.clear(); RequiresScalarEpilogue = false; } Loading llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +7 −2 Original line number Diff line number Diff line Loading @@ -201,6 +201,9 @@ class LoopVectorizationPlanner { /// The profitability analysis. LoopVectorizationCostModel &CM; /// The interleaved access analysis. InterleavedAccessInfo &IAI; SmallVector<VPlanPtr, 4> VPlans; /// This class is used to enable the VPlan to invoke a method of ILV. This is Loading @@ -223,8 +226,10 @@ public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM) : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI) : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI) {} /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. Loading llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +93 −110 Original line number Diff line number Diff line Loading @@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { return BlockMaskCache[BB] = BlockMask; } VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I); if (!IG) return nullptr; // Now check if IG is relevant for VF's in the given range. auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> { return [=](unsigned VF) -> bool { return (VF >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(I, VF) == LoopVectorizationCostModel::CM_Interleave); }; }; if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range)) return nullptr; // I is a member of an InterleaveGroup for VF's in the (possibly trimmed) // range. If it's the primary member of the IG construct a VPInterleaveRecipe. // Otherwise, it's an adjunct member of the IG, do not construct any Recipe. assert(I == IG->getInsertPos() && "Generating a recipe for an adjunct member of an interleave group"); VPValue *Mask = nullptr; if (Legal->isMaskRequired(I)) Mask = createBlockInMask(I->getParent(), Plan); return new VPInterleaveRecipe(IG, Mask); } VPWidenMemoryInstructionRecipe * VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { Loading @@ -6757,8 +6726,6 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, CM.getWideningDecision(I, VF); assert(Decision != LoopVectorizationCostModel::CM_Unknown && "CM decision should be taken at this point."); assert(Decision != LoopVectorizationCostModel::CM_Interleave && "Interleave memory opportunity should be caught earlier."); return Decision != LoopVectorizationCostModel::CM_Scalarize; }; Loading Loading @@ -6923,15 +6890,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB, if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) return false; // If this ingredient's recipe is to be recorded, keep its recipe a singleton // to avoid having to split recipes later. bool IsSingleton = Ingredient2Recipe.count(I); // Success: widen this instruction. We optimize the common case where // consecutive instructions can be represented by a single recipe. if (!VPBB->empty()) { VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back()); if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I)) if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() && LastExtensibleRecipe->appendInstruction(I)) return true; } VPBB->appendRecipe(new VPWidenRecipe(I)); VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I); if (!IsSingleton) LastExtensibleRecipe = WidenRecipe; setRecipe(I, WidenRecipe); VPBB->appendRecipe(WidenRecipe); return true; } Loading @@ -6947,6 +6920,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range); auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated); setRecipe(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into Loading Loading @@ -7005,36 +6979,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range, VPlanPtr &Plan, VPBasicBlock *VPBB) { VPRecipeBase *Recipe = nullptr; // Check if Instr should belong to an interleave memory recipe, or already // does. In the latter case Instr is irrelevant. if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) { VPBB->appendRecipe(Recipe); return true; } // Check if Instr is a memory operation that should be widened. if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) { // First, check for specific widening recipes that deal with memory // operations, inductions and Phi nodes. if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) || (Recipe = tryToOptimizeInduction(Instr, Range)) || (Recipe = tryToBlend(Instr, Plan)) || (isa<PHINode>(Instr) && (Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) { setRecipe(Instr, Recipe); VPBB->appendRecipe(Recipe); return true; } // Check if Instr should form some PHI recipe. if ((Recipe = tryToOptimizeInduction(Instr, Range))) { VPBB->appendRecipe(Recipe); return true; } if ((Recipe = tryToBlend(Instr, Plan))) { VPBB->appendRecipe(Recipe); return true; } if (PHINode *Phi = dyn_cast<PHINode>(Instr)) { VPBB->appendRecipe(new VPWidenPHIRecipe(Phi)); return true; } // Check if Instr is to be widened by a general VPWidenRecipe, after // having first checked for specific widening recipes that deal with // Interleave Groups, Inductions and Phi nodes. // Check if Instr is to be widened by a general VPWidenRecipe. if (tryToWiden(Instr, VPBB, Range)) return true; Loading Loading @@ -7090,19 +7048,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef, SmallPtrSetImpl<Instruction *> &DeadInstructions) { // Hold a mapping from predicated instructions to their recipes, in order to // fix their AlsoPack behavior if a user is determined to replicate and use a // scalar instead of vector value. DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe; DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter(); DenseMap<Instruction *, Instruction *> SinkAfterInverse; SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups; VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // --------------------------------------------------------------------------- // Pre-construction: record ingredients whose recipes we'll need to further // process after constructing the initial VPlan. // --------------------------------------------------------------------------- // Mark instructions we'll need to sink later and their targets as // ingredients whose recipe we'll need to record. for (auto &Entry : SinkAfter) { RecipeBuilder.recordRecipeOf(Entry.first); RecipeBuilder.recordRecipeOf(Entry.second); } // For each interleave group which is relevant for this (possibly trimmed) // Range, add it to the set of groups to be later applied to the VPlan and add // placeholders for its members' Recipes which we'll be replacing with a // single VPInterleaveRecipe. for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) { auto applyIG = [IG, this](unsigned VF) -> bool { return (VF >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(IG->getInsertPos(), VF) == LoopVectorizationCostModel::CM_Interleave); }; if (!getDecisionAndClampRange(applyIG, Range)) continue; InterleaveGroups.insert(IG); for (unsigned i = 0; i < IG->getFactor(); i++) if (Instruction *Member = IG->getMember(i)) RecipeBuilder.recordRecipeOf(Member); }; // --------------------------------------------------------------------------- // Build initial VPlan: Scan the body of the loop in a topological order to // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- // Create a dummy pre-entry VPBasicBlock to start building the VPlan. VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); auto Plan = std::make_unique<VPlan>(VPBB); VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // Represent values that will have defs inside VPlan. for (Value *V : NeedDef) Plan->addVPValue(V); Loading @@ -7123,8 +7119,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( std::vector<Instruction *> Ingredients; // Organize the ingredients to vectorize from current basic block in the // right order. // Introduce each ingredient into VPlan. for (Instruction &I : BB->instructionsWithoutDebug()) { Instruction *Instr = &I; Loading @@ -7134,43 +7129,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( DeadInstructions.find(Instr) != DeadInstructions.end()) continue; // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct // member of the IG, do not construct any Recipe for it. const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(Instr); if (IG && Instr != IG->getInsertPos() && Range.Start >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(Instr, Range.Start) == LoopVectorizationCostModel::CM_Interleave) { auto SinkCandidate = SinkAfterInverse.find(Instr); if (SinkCandidate != SinkAfterInverse.end()) Ingredients.push_back(SinkCandidate->second); continue; } // Move instructions to handle first-order recurrences, step 1: avoid // handling this instruction until after we've handled the instruction it // should follow. auto SAIt = SinkAfter.find(Instr); if (SAIt != SinkAfter.end()) { LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" << *SAIt->second << " to vectorize a 1st order recurrence.\n"); SinkAfterInverse[SAIt->second] = Instr; continue; } Ingredients.push_back(Instr); // Move instructions to handle first-order recurrences, step 2: push the // instruction to be sunk at its insertion point. auto SAInvIt = SinkAfterInverse.find(Instr); if (SAInvIt != SinkAfterInverse.end()) Ingredients.push_back(SAInvIt->second); } // Introduce each ingredient into VPlan. for (Instruction *Instr : Ingredients) { if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB)) continue; Loading @@ -7195,6 +7153,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::disconnectBlocks(PreEntry, Entry); delete PreEntry; // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. // --------------------------------------------------------------------------- // Apply Sink-After legal constraints. for (auto &Entry : SinkAfter) { VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); Sink->moveAfter(Target); } // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a // single VPInterleaveRecipe at its insertion point. for (auto IG : InterleaveGroups) { auto *Recipe = cast<VPWidenMemoryInstructionRecipe>( RecipeBuilder.getRecipe(IG->getInsertPos())); (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe); for (unsigned i = 0; i < IG->getFactor(); ++i) if (Instruction *Member = IG->getMember(i)) { RecipeBuilder.getRecipe(Member)->eraseFromParent(); } } // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. if (CM.foldTailByMasking()) { Loading Loading @@ -7427,12 +7411,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { if (!User) VPValue *Mask = getMask(); if (!Mask) return State.ILV->vectorizeMemoryInstruction(&Instr); // Last (and currently only) operand is a mask. InnerLoopVectorizer::VectorParts MaskValues(State.UF); VPValue *Mask = User->getOperand(User->getNumOperands() - 1); for (unsigned Part = 0; Part < State.UF; ++Part) MaskValues[Part] = State.get(Mask, Part); State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); Loading Loading @@ -7473,7 +7456,7 @@ static bool processLoopInVPlanNativePath( // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM); LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI); // Get user vectorization factor. const unsigned UserVF = Hints.getWidth(); Loading Loading @@ -7631,7 +7614,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { CM.collectValuesToIgnore(); // Use the planner for vectorization. LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM); LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI); // Get user vectorization factor. unsigned UserVF = Hints.getWidth(); Loading llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +34 −10 Original line number Diff line number Diff line Loading @@ -47,6 +47,24 @@ class VPRecipeBuilder { EdgeMaskCacheTy EdgeMaskCache; BlockMaskCacheTy BlockMaskCache; // VPlan-VPlan transformations support: Hold a mapping from ingredients to // their recipe. To save on memory, only do so for selected ingredients, // marked by having a nullptr entry in this map. If those ingredients get a // VPWidenRecipe, also avoid compressing other ingredients into it to avoid // having to split such recipes later. DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe; VPWidenRecipe *LastExtensibleRecipe = nullptr; /// Set the recipe created for given ingredient. This operation is a no-op for /// ingredients that were not marked using a nullptr entry in the map. void setRecipe(Instruction *I, VPRecipeBase *R) { if (!Ingredient2Recipe.count(I)) return; assert(Ingredient2Recipe[I] == nullptr && "Recipe already set for ingredient"); Ingredient2Recipe[I] = R; } public: /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True. It returns the *entry* Loading @@ -57,16 +75,22 @@ public: /// and DST. VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); /// Check if \I belongs to an Interleave Group within the given VF \p Range, /// \return true in the first returned value if so and false otherwise. /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG /// for \p Range.Start, and provide it as the second returned value. /// Note that if \I is an adjunct member of an IG for \p Range.Start, the /// \return value is <true, nullptr>, as it is handled by another recipe. /// \p Range.End may be decreased to ensure same decision from \p Range.Start /// to \p Range.End. VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); /// Mark given ingredient for recording its recipe once one is created for /// it. void recordRecipeOf(Instruction *I) { assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) && "Recipe already set for ingredient"); Ingredient2Recipe[I] = nullptr; } /// Return the recipe created for given ingredient. VPRecipeBase *getRecipe(Instruction *I) { assert(Ingredient2Recipe.count(I) && "Recording this ingredients recipe was not requested"); assert(Ingredient2Recipe[I] != nullptr && "Ingredient doesn't have a recipe"); return Ingredient2Recipe[I]; } /// Check if \I is a memory instruction to be widened for \p Range.Start and /// potentially masked. Such instructions are handled by a recipe that takes Loading llvm/lib/Transforms/Vectorize/VPlan.cpp +20 −3 Original line number Diff line number Diff line Loading @@ -275,18 +275,35 @@ void VPRegionBlock::execute(VPTransformState *State) { } void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && "Insertion position not in any VPBasicBlock"); Parent = InsertPos->getParent(); Parent->getRecipeList().insert(InsertPos->getIterator(), this); } void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && "Insertion position not in any VPBasicBlock"); Parent = InsertPos->getParent(); Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); } void VPRecipeBase::removeFromParent() { assert(getParent() && "Recipe not in any VPBasicBlock"); getParent()->getRecipeList().remove(getIterator()); Parent = nullptr; } iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { assert(getParent() && "Recipe not in any VPBasicBlock"); return getParent()->getRecipeList().erase(getIterator()); } void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { InsertPos->getParent()->getRecipeList().splice( std::next(InsertPos->getIterator()), getParent()->getRecipeList(), getIterator()); removeFromParent(); insertAfter(InsertPos); } void VPInstruction::generateInstruction(VPTransformState &State, Loading Loading
llvm/include/llvm/Analysis/VectorUtils.h +3 −6 Original line number Diff line number Diff line Loading @@ -542,13 +542,10 @@ public: /// formation for predicated accesses, we may be able to relax this limitation /// in the future once we handle more complicated blocks. void reset() { SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet; // Avoid releasing a pointer twice. for (auto &I : InterleaveGroupMap) DelSet.insert(I.second); for (auto *Ptr : DelSet) delete Ptr; InterleaveGroupMap.clear(); for (auto *Ptr : InterleaveGroups) delete Ptr; InterleaveGroups.clear(); RequiresScalarEpilogue = false; } Loading
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +7 −2 Original line number Diff line number Diff line Loading @@ -201,6 +201,9 @@ class LoopVectorizationPlanner { /// The profitability analysis. LoopVectorizationCostModel &CM; /// The interleaved access analysis. InterleavedAccessInfo &IAI; SmallVector<VPlanPtr, 4> VPlans; /// This class is used to enable the VPlan to invoke a method of ILV. This is Loading @@ -223,8 +226,10 @@ public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM) : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI) : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI) {} /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. Loading
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +93 −110 Original line number Diff line number Diff line Loading @@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { return BlockMaskCache[BB] = BlockMask; } VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I); if (!IG) return nullptr; // Now check if IG is relevant for VF's in the given range. auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> { return [=](unsigned VF) -> bool { return (VF >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(I, VF) == LoopVectorizationCostModel::CM_Interleave); }; }; if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range)) return nullptr; // I is a member of an InterleaveGroup for VF's in the (possibly trimmed) // range. If it's the primary member of the IG construct a VPInterleaveRecipe. // Otherwise, it's an adjunct member of the IG, do not construct any Recipe. assert(I == IG->getInsertPos() && "Generating a recipe for an adjunct member of an interleave group"); VPValue *Mask = nullptr; if (Legal->isMaskRequired(I)) Mask = createBlockInMask(I->getParent(), Plan); return new VPInterleaveRecipe(IG, Mask); } VPWidenMemoryInstructionRecipe * VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { Loading @@ -6757,8 +6726,6 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, CM.getWideningDecision(I, VF); assert(Decision != LoopVectorizationCostModel::CM_Unknown && "CM decision should be taken at this point."); assert(Decision != LoopVectorizationCostModel::CM_Interleave && "Interleave memory opportunity should be caught earlier."); return Decision != LoopVectorizationCostModel::CM_Scalarize; }; Loading Loading @@ -6923,15 +6890,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB, if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) return false; // If this ingredient's recipe is to be recorded, keep its recipe a singleton // to avoid having to split recipes later. bool IsSingleton = Ingredient2Recipe.count(I); // Success: widen this instruction. We optimize the common case where // consecutive instructions can be represented by a single recipe. if (!VPBB->empty()) { VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back()); if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I)) if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() && LastExtensibleRecipe->appendInstruction(I)) return true; } VPBB->appendRecipe(new VPWidenRecipe(I)); VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I); if (!IsSingleton) LastExtensibleRecipe = WidenRecipe; setRecipe(I, WidenRecipe); VPBB->appendRecipe(WidenRecipe); return true; } Loading @@ -6947,6 +6920,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range); auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated); setRecipe(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into Loading Loading @@ -7005,36 +6979,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range, VPlanPtr &Plan, VPBasicBlock *VPBB) { VPRecipeBase *Recipe = nullptr; // Check if Instr should belong to an interleave memory recipe, or already // does. In the latter case Instr is irrelevant. if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) { VPBB->appendRecipe(Recipe); return true; } // Check if Instr is a memory operation that should be widened. if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) { // First, check for specific widening recipes that deal with memory // operations, inductions and Phi nodes. if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) || (Recipe = tryToOptimizeInduction(Instr, Range)) || (Recipe = tryToBlend(Instr, Plan)) || (isa<PHINode>(Instr) && (Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) { setRecipe(Instr, Recipe); VPBB->appendRecipe(Recipe); return true; } // Check if Instr should form some PHI recipe. if ((Recipe = tryToOptimizeInduction(Instr, Range))) { VPBB->appendRecipe(Recipe); return true; } if ((Recipe = tryToBlend(Instr, Plan))) { VPBB->appendRecipe(Recipe); return true; } if (PHINode *Phi = dyn_cast<PHINode>(Instr)) { VPBB->appendRecipe(new VPWidenPHIRecipe(Phi)); return true; } // Check if Instr is to be widened by a general VPWidenRecipe, after // having first checked for specific widening recipes that deal with // Interleave Groups, Inductions and Phi nodes. // Check if Instr is to be widened by a general VPWidenRecipe. if (tryToWiden(Instr, VPBB, Range)) return true; Loading Loading @@ -7090,19 +7048,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef, SmallPtrSetImpl<Instruction *> &DeadInstructions) { // Hold a mapping from predicated instructions to their recipes, in order to // fix their AlsoPack behavior if a user is determined to replicate and use a // scalar instead of vector value. DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe; DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter(); DenseMap<Instruction *, Instruction *> SinkAfterInverse; SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups; VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // --------------------------------------------------------------------------- // Pre-construction: record ingredients whose recipes we'll need to further // process after constructing the initial VPlan. // --------------------------------------------------------------------------- // Mark instructions we'll need to sink later and their targets as // ingredients whose recipe we'll need to record. for (auto &Entry : SinkAfter) { RecipeBuilder.recordRecipeOf(Entry.first); RecipeBuilder.recordRecipeOf(Entry.second); } // For each interleave group which is relevant for this (possibly trimmed) // Range, add it to the set of groups to be later applied to the VPlan and add // placeholders for its members' Recipes which we'll be replacing with a // single VPInterleaveRecipe. for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) { auto applyIG = [IG, this](unsigned VF) -> bool { return (VF >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(IG->getInsertPos(), VF) == LoopVectorizationCostModel::CM_Interleave); }; if (!getDecisionAndClampRange(applyIG, Range)) continue; InterleaveGroups.insert(IG); for (unsigned i = 0; i < IG->getFactor(); i++) if (Instruction *Member = IG->getMember(i)) RecipeBuilder.recordRecipeOf(Member); }; // --------------------------------------------------------------------------- // Build initial VPlan: Scan the body of the loop in a topological order to // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- // Create a dummy pre-entry VPBasicBlock to start building the VPlan. VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); auto Plan = std::make_unique<VPlan>(VPBB); VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // Represent values that will have defs inside VPlan. for (Value *V : NeedDef) Plan->addVPValue(V); Loading @@ -7123,8 +7119,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( std::vector<Instruction *> Ingredients; // Organize the ingredients to vectorize from current basic block in the // right order. // Introduce each ingredient into VPlan. for (Instruction &I : BB->instructionsWithoutDebug()) { Instruction *Instr = &I; Loading @@ -7134,43 +7129,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( DeadInstructions.find(Instr) != DeadInstructions.end()) continue; // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct // member of the IG, do not construct any Recipe for it. const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(Instr); if (IG && Instr != IG->getInsertPos() && Range.Start >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(Instr, Range.Start) == LoopVectorizationCostModel::CM_Interleave) { auto SinkCandidate = SinkAfterInverse.find(Instr); if (SinkCandidate != SinkAfterInverse.end()) Ingredients.push_back(SinkCandidate->second); continue; } // Move instructions to handle first-order recurrences, step 1: avoid // handling this instruction until after we've handled the instruction it // should follow. auto SAIt = SinkAfter.find(Instr); if (SAIt != SinkAfter.end()) { LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" << *SAIt->second << " to vectorize a 1st order recurrence.\n"); SinkAfterInverse[SAIt->second] = Instr; continue; } Ingredients.push_back(Instr); // Move instructions to handle first-order recurrences, step 2: push the // instruction to be sunk at its insertion point. auto SAInvIt = SinkAfterInverse.find(Instr); if (SAInvIt != SinkAfterInverse.end()) Ingredients.push_back(SAInvIt->second); } // Introduce each ingredient into VPlan. for (Instruction *Instr : Ingredients) { if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB)) continue; Loading @@ -7195,6 +7153,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::disconnectBlocks(PreEntry, Entry); delete PreEntry; // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. // --------------------------------------------------------------------------- // Apply Sink-After legal constraints. for (auto &Entry : SinkAfter) { VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); Sink->moveAfter(Target); } // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a // single VPInterleaveRecipe at its insertion point. for (auto IG : InterleaveGroups) { auto *Recipe = cast<VPWidenMemoryInstructionRecipe>( RecipeBuilder.getRecipe(IG->getInsertPos())); (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe); for (unsigned i = 0; i < IG->getFactor(); ++i) if (Instruction *Member = IG->getMember(i)) { RecipeBuilder.getRecipe(Member)->eraseFromParent(); } } // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. if (CM.foldTailByMasking()) { Loading Loading @@ -7427,12 +7411,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { if (!User) VPValue *Mask = getMask(); if (!Mask) return State.ILV->vectorizeMemoryInstruction(&Instr); // Last (and currently only) operand is a mask. InnerLoopVectorizer::VectorParts MaskValues(State.UF); VPValue *Mask = User->getOperand(User->getNumOperands() - 1); for (unsigned Part = 0; Part < State.UF; ++Part) MaskValues[Part] = State.get(Mask, Part); State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); Loading Loading @@ -7473,7 +7456,7 @@ static bool processLoopInVPlanNativePath( // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM); LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI); // Get user vectorization factor. const unsigned UserVF = Hints.getWidth(); Loading Loading @@ -7631,7 +7614,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { CM.collectValuesToIgnore(); // Use the planner for vectorization. LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM); LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI); // Get user vectorization factor. unsigned UserVF = Hints.getWidth(); Loading
llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +34 −10 Original line number Diff line number Diff line Loading @@ -47,6 +47,24 @@ class VPRecipeBuilder { EdgeMaskCacheTy EdgeMaskCache; BlockMaskCacheTy BlockMaskCache; // VPlan-VPlan transformations support: Hold a mapping from ingredients to // their recipe. To save on memory, only do so for selected ingredients, // marked by having a nullptr entry in this map. If those ingredients get a // VPWidenRecipe, also avoid compressing other ingredients into it to avoid // having to split such recipes later. DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe; VPWidenRecipe *LastExtensibleRecipe = nullptr; /// Set the recipe created for given ingredient. This operation is a no-op for /// ingredients that were not marked using a nullptr entry in the map. void setRecipe(Instruction *I, VPRecipeBase *R) { if (!Ingredient2Recipe.count(I)) return; assert(Ingredient2Recipe[I] == nullptr && "Recipe already set for ingredient"); Ingredient2Recipe[I] = R; } public: /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True. It returns the *entry* Loading @@ -57,16 +75,22 @@ public: /// and DST. VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); /// Check if \I belongs to an Interleave Group within the given VF \p Range, /// \return true in the first returned value if so and false otherwise. /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG /// for \p Range.Start, and provide it as the second returned value. /// Note that if \I is an adjunct member of an IG for \p Range.Start, the /// \return value is <true, nullptr>, as it is handled by another recipe. /// \p Range.End may be decreased to ensure same decision from \p Range.Start /// to \p Range.End. VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); /// Mark given ingredient for recording its recipe once one is created for /// it. void recordRecipeOf(Instruction *I) { assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) && "Recipe already set for ingredient"); Ingredient2Recipe[I] = nullptr; } /// Return the recipe created for given ingredient. VPRecipeBase *getRecipe(Instruction *I) { assert(Ingredient2Recipe.count(I) && "Recording this ingredients recipe was not requested"); assert(Ingredient2Recipe[I] != nullptr && "Ingredient doesn't have a recipe"); return Ingredient2Recipe[I]; } /// Check if \I is a memory instruction to be widened for \p Range.Start and /// potentially masked. Such instructions are handled by a recipe that takes Loading
llvm/lib/Transforms/Vectorize/VPlan.cpp +20 −3 Original line number Diff line number Diff line Loading @@ -275,18 +275,35 @@ void VPRegionBlock::execute(VPTransformState *State) { } void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && "Insertion position not in any VPBasicBlock"); Parent = InsertPos->getParent(); Parent->getRecipeList().insert(InsertPos->getIterator(), this); } void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && "Insertion position not in any VPBasicBlock"); Parent = InsertPos->getParent(); Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); } void VPRecipeBase::removeFromParent() { assert(getParent() && "Recipe not in any VPBasicBlock"); getParent()->getRecipeList().remove(getIterator()); Parent = nullptr; } iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { assert(getParent() && "Recipe not in any VPBasicBlock"); return getParent()->getRecipeList().erase(getIterator()); } void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { InsertPos->getParent()->getRecipeList().splice( std::next(InsertPos->getIterator()), getParent()->getRecipeList(), getIterator()); removeFromParent(); insertAfter(InsertPos); } void VPInstruction::generateInstruction(VPTransformState &State, Loading