Loading llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +55 −67 Original line number Diff line number Diff line Loading @@ -548,10 +548,6 @@ protected: /// represented as. void truncateToMinimalBitwidths(); /// Insert the new loop to the loop hierarchy and pass manager /// and update the analysis passes. void updateAnalysis(); /// Create a broadcast instruction. This method generates a broadcast /// instruction (shuffle) for loop invariant values and for the induction /// value. If this is the induction variable then we extend it to N, N+1, ... Loading Loading @@ -2708,14 +2704,18 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, "min.iters.check"); // Create new preheader for vector loop. LoopVectorPreHeader = TCCheckBlock->splitBasicBlock( TCCheckBlock->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(LoopVectorPreHeader, TCCheckBlock); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI); LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), DT, LI, nullptr, "vector.ph"); assert(DT->properlyDominates(DT->getNode(TCCheckBlock), DT->getNode(Bypass)->getIDom()) && "TC check is expected to dominate Bypass"); // Update dominator for Bypass & LoopExit. DT->changeImmediateDominator(Bypass, TCCheckBlock); DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock); ReplaceInstWithInst( TCCheckBlock->getTerminator(), BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters)); Loading Loading @@ -2744,14 +2744,16 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { SCEVCheckBlock->setName("vector.scevcheck"); // Create new preheader for vector loop. LoopVectorPreHeader = SCEVCheckBlock->splitBasicBlock( SCEVCheckBlock->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(LoopVectorPreHeader, SCEVCheckBlock); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI); LoopVectorPreHeader = SplitBlock(SCEVCheckBlock, SCEVCheckBlock->getTerminator(), DT, LI, nullptr, "vector.ph"); // Update dominator only if this is first RT check. if (LoopBypassBlocks.empty()) { DT->changeImmediateDominator(Bypass, SCEVCheckBlock); DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock); } ReplaceInstWithInst( SCEVCheckBlock->getTerminator(), BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheck)); Loading Loading @@ -2794,15 +2796,16 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { MemCheckBlock->setName("vector.memcheck"); // Create new preheader for vector loop. LoopVectorPreHeader = MemCheckBlock->splitBasicBlock( MemCheckBlock->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(LoopVectorPreHeader, MemCheckBlock); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI); LoopVectorPreHeader = SplitBlock(MemCheckBlock, MemCheckBlock->getTerminator(), DT, LI, nullptr, "vector.ph"); // Update dominator only if this is first RT check. if (LoopBypassBlocks.empty()) { DT->changeImmediateDominator(Bypass, MemCheckBlock); DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock); } ReplaceInstWithInst( MemCheckBlock->getTerminator(), BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheck)); Loading Loading @@ -2957,15 +2960,24 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { LoopScalarBody = OrigLoop->getHeader(); LoopVectorPreHeader = OrigLoop->getLoopPreheader(); LoopExitBlock = OrigLoop->getExitBlock(); assert(LoopVectorPreHeader && "Invalid loop structure"); assert(LoopExitBlock && "Must have an exit block"); assert(LoopVectorPreHeader && "Invalid loop structure"); LoopVectorBody = LoopVectorPreHeader->splitBasicBlock( LoopVectorPreHeader->getTerminator(), "vector.body"); LoopMiddleBlock = LoopVectorBody->splitBasicBlock( LoopVectorBody->getTerminator(), "middle.block"); LoopScalarPreHeader = LoopMiddleBlock->splitBasicBlock( LoopMiddleBlock->getTerminator(), "scalar.ph"); LoopMiddleBlock = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI, nullptr, "middle.block"); LoopScalarPreHeader = SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI, nullptr, "scalar.ph"); // We intentionally don't let SplitBlock to update LoopInfo since // LoopVectorBody should belong to another loop than LoopVectorPreHeader. // LoopVectorBody is explicitly added to the correct place few lines later. LoopVectorBody = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, nullptr, nullptr, "vector.body"); // Update dominator for loop exit. DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); // Create and register the new vector loop. Loop *Lp = LI->AllocateLoop(); Loading @@ -2975,8 +2987,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { // before calling any utilities such as SCEV that require valid LoopInfo. if (ParentLoop) { ParentLoop->addChildLoop(Lp); ParentLoop->addBasicBlockToLoop(LoopScalarPreHeader, *LI); ParentLoop->addBasicBlockToLoop(LoopMiddleBlock, *LI); } else { LI->addTopLevelLoop(Lp); } Loading Loading @@ -3116,6 +3126,11 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { LoopVectorizeHints Hints(Lp, true, *ORE); Hints.setAlreadyVectorized(); #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); LI->verify(*DT); #endif return LoopVectorPreHeader; } Loading Loading @@ -3451,15 +3466,8 @@ void InnerLoopVectorizer::fixVectorizedLoop() { // This is the second stage of vectorizing recurrences. fixCrossIterationPHIs(); // Update the dominator tree. // // FIXME: After creating the structure of the new loop, the dominator tree is // no longer up-to-date, and it remains that way until we update it // here. An out-of-date dominator tree is problematic for SCEV, // because SCEVExpander uses it to guide code generation. The // vectorizer use SCEVExpanders in several places. Instead, we should // keep the dominator tree up-to-date as we go. updateAnalysis(); // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); // Fix-up external users of the induction variables. for (auto &Entry : *Legal->getInductionVars()) Loading Loading @@ -4403,26 +4411,6 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { } // end of switch. } void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); // DT is not kept up-to-date for outer loop vectorization if (EnableVPlanNativePath) return; // Update the dominator tree information. assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); DT->addNewBlock(LoopMiddleBlock, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]); assert(DT->verify(DominatorTree::VerificationLevel::Fast)); } void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does Loading llvm/lib/Transforms/Vectorize/VPlan.cpp +7 −3 Original line number Diff line number Diff line Loading @@ -465,7 +465,8 @@ void VPlan::execute(VPTransformState *State) { // We do not attempt to preserve DT for outer loop vectorization currently. if (!EnableVPlanNativePath) updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB, L->getExitBlock()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) Loading @@ -474,10 +475,10 @@ void VPlan::dump() const { dbgs() << *this << '\n'; } #endif void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, BasicBlock *LoopLatchBB) { BasicBlock *LoopLatchBB, BasicBlock *LoopExitBB) { BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); assert(LoopHeaderBB && "Loop preheader does not have a single successor."); DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB); // The vector body may be more than a single basic-block by this point. // Update the dominator tree information inside the vector body by propagating // it from header to latch, expecting only triangular control-flow, if any. Loading Loading @@ -508,6 +509,9 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, DT->addNewBlock(InterimSucc, BB); DT->addNewBlock(PostDomSucc, BB); } // Latch block is a new dominator for the loop exit. DT->changeImmediateDominator(LoopExitBB, LoopLatchBB); assert(DT->verify(DominatorTree::VerificationLevel::Fast)); } const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { Loading llvm/lib/Transforms/Vectorize/VPlan.h +2 −2 Original line number Diff line number Diff line Loading @@ -1424,9 +1424,9 @@ public: private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. static void updateDominatorTree(DominatorTree *DT, static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB, BasicBlock *LoopPreHeaderBB, BasicBlock *LoopLatchBB); BasicBlock *LoopExitBB); }; /// VPlanPrinter prints a given VPlan to a given output stream. The printing is Loading Loading
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +55 −67 Original line number Diff line number Diff line Loading @@ -548,10 +548,6 @@ protected: /// represented as. void truncateToMinimalBitwidths(); /// Insert the new loop to the loop hierarchy and pass manager /// and update the analysis passes. void updateAnalysis(); /// Create a broadcast instruction. This method generates a broadcast /// instruction (shuffle) for loop invariant values and for the induction /// value. If this is the induction variable then we extend it to N, N+1, ... Loading Loading @@ -2708,14 +2704,18 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, "min.iters.check"); // Create new preheader for vector loop. LoopVectorPreHeader = TCCheckBlock->splitBasicBlock( TCCheckBlock->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(LoopVectorPreHeader, TCCheckBlock); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI); LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), DT, LI, nullptr, "vector.ph"); assert(DT->properlyDominates(DT->getNode(TCCheckBlock), DT->getNode(Bypass)->getIDom()) && "TC check is expected to dominate Bypass"); // Update dominator for Bypass & LoopExit. DT->changeImmediateDominator(Bypass, TCCheckBlock); DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock); ReplaceInstWithInst( TCCheckBlock->getTerminator(), BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters)); Loading Loading @@ -2744,14 +2744,16 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { SCEVCheckBlock->setName("vector.scevcheck"); // Create new preheader for vector loop. LoopVectorPreHeader = SCEVCheckBlock->splitBasicBlock( SCEVCheckBlock->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(LoopVectorPreHeader, SCEVCheckBlock); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI); LoopVectorPreHeader = SplitBlock(SCEVCheckBlock, SCEVCheckBlock->getTerminator(), DT, LI, nullptr, "vector.ph"); // Update dominator only if this is first RT check. if (LoopBypassBlocks.empty()) { DT->changeImmediateDominator(Bypass, SCEVCheckBlock); DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock); } ReplaceInstWithInst( SCEVCheckBlock->getTerminator(), BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheck)); Loading Loading @@ -2794,15 +2796,16 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { MemCheckBlock->setName("vector.memcheck"); // Create new preheader for vector loop. LoopVectorPreHeader = MemCheckBlock->splitBasicBlock( MemCheckBlock->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass // checks may query it before the current function is finished. DT->addNewBlock(LoopVectorPreHeader, MemCheckBlock); if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(LoopVectorPreHeader, *LI); LoopVectorPreHeader = SplitBlock(MemCheckBlock, MemCheckBlock->getTerminator(), DT, LI, nullptr, "vector.ph"); // Update dominator only if this is first RT check. if (LoopBypassBlocks.empty()) { DT->changeImmediateDominator(Bypass, MemCheckBlock); DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock); } ReplaceInstWithInst( MemCheckBlock->getTerminator(), BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheck)); Loading Loading @@ -2957,15 +2960,24 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { LoopScalarBody = OrigLoop->getHeader(); LoopVectorPreHeader = OrigLoop->getLoopPreheader(); LoopExitBlock = OrigLoop->getExitBlock(); assert(LoopVectorPreHeader && "Invalid loop structure"); assert(LoopExitBlock && "Must have an exit block"); assert(LoopVectorPreHeader && "Invalid loop structure"); LoopVectorBody = LoopVectorPreHeader->splitBasicBlock( LoopVectorPreHeader->getTerminator(), "vector.body"); LoopMiddleBlock = LoopVectorBody->splitBasicBlock( LoopVectorBody->getTerminator(), "middle.block"); LoopScalarPreHeader = LoopMiddleBlock->splitBasicBlock( LoopMiddleBlock->getTerminator(), "scalar.ph"); LoopMiddleBlock = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI, nullptr, "middle.block"); LoopScalarPreHeader = SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI, nullptr, "scalar.ph"); // We intentionally don't let SplitBlock to update LoopInfo since // LoopVectorBody should belong to another loop than LoopVectorPreHeader. // LoopVectorBody is explicitly added to the correct place few lines later. LoopVectorBody = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, nullptr, nullptr, "vector.body"); // Update dominator for loop exit. DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); // Create and register the new vector loop. Loop *Lp = LI->AllocateLoop(); Loading @@ -2975,8 +2987,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { // before calling any utilities such as SCEV that require valid LoopInfo. if (ParentLoop) { ParentLoop->addChildLoop(Lp); ParentLoop->addBasicBlockToLoop(LoopScalarPreHeader, *LI); ParentLoop->addBasicBlockToLoop(LoopMiddleBlock, *LI); } else { LI->addTopLevelLoop(Lp); } Loading Loading @@ -3116,6 +3126,11 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { LoopVectorizeHints Hints(Lp, true, *ORE); Hints.setAlreadyVectorized(); #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); LI->verify(*DT); #endif return LoopVectorPreHeader; } Loading Loading @@ -3451,15 +3466,8 @@ void InnerLoopVectorizer::fixVectorizedLoop() { // This is the second stage of vectorizing recurrences. fixCrossIterationPHIs(); // Update the dominator tree. // // FIXME: After creating the structure of the new loop, the dominator tree is // no longer up-to-date, and it remains that way until we update it // here. An out-of-date dominator tree is problematic for SCEV, // because SCEVExpander uses it to guide code generation. The // vectorizer use SCEVExpanders in several places. Instead, we should // keep the dominator tree up-to-date as we go. updateAnalysis(); // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); // Fix-up external users of the induction variables. for (auto &Entry : *Legal->getInductionVars()) Loading Loading @@ -4403,26 +4411,6 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { } // end of switch. } void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); // DT is not kept up-to-date for outer loop vectorization if (EnableVPlanNativePath) return; // Update the dominator tree information. assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); DT->addNewBlock(LoopMiddleBlock, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]); assert(DT->verify(DominatorTree::VerificationLevel::Fast)); } void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does Loading
llvm/lib/Transforms/Vectorize/VPlan.cpp +7 −3 Original line number Diff line number Diff line Loading @@ -465,7 +465,8 @@ void VPlan::execute(VPTransformState *State) { // We do not attempt to preserve DT for outer loop vectorization currently. if (!EnableVPlanNativePath) updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB, L->getExitBlock()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) Loading @@ -474,10 +475,10 @@ void VPlan::dump() const { dbgs() << *this << '\n'; } #endif void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, BasicBlock *LoopLatchBB) { BasicBlock *LoopLatchBB, BasicBlock *LoopExitBB) { BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); assert(LoopHeaderBB && "Loop preheader does not have a single successor."); DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB); // The vector body may be more than a single basic-block by this point. // Update the dominator tree information inside the vector body by propagating // it from header to latch, expecting only triangular control-flow, if any. Loading Loading @@ -508,6 +509,9 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, DT->addNewBlock(InterimSucc, BB); DT->addNewBlock(PostDomSucc, BB); } // Latch block is a new dominator for the loop exit. DT->changeImmediateDominator(LoopExitBB, LoopLatchBB); assert(DT->verify(DominatorTree::VerificationLevel::Fast)); } const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { Loading
llvm/lib/Transforms/Vectorize/VPlan.h +2 −2 Original line number Diff line number Diff line Loading @@ -1424,9 +1424,9 @@ public: private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. static void updateDominatorTree(DominatorTree *DT, static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB, BasicBlock *LoopPreHeaderBB, BasicBlock *LoopLatchBB); BasicBlock *LoopExitBB); }; /// VPlanPrinter prints a given VPlan to a given output stream. The printing is Loading