Unverified Commit b2f9210e authored by Alexey Bataev's avatar Alexey Bataev Committed by GitHub
Browse files

[SLP] Keep loops BTCs across CurrentLoopNest truncations

Record SCEV BTCs in a per-depth vector so a later loop nest reaching a
previously merged depth via the empty, divergence, or extend branch in
buildTreeRec is re-validated.

Reviewers: 

Pull Request: https://github.com/llvm/llvm-project/pull/195411
parent 0605d2c1
Loading
Loading
Loading
Loading
Loading
+39 −4
Original line number Diff line number Diff line
@@ -2287,6 +2287,7 @@ public:
    ValueToGatherNodes.clear();
    TreeEntryToStridedPtrInfoMap.clear();
    CurrentLoopNest.clear();
    MergedLoopBTCs.clear();
  }
  unsigned getTreeSize() const { return VectorizableTree.size(); }
@@ -4930,6 +4931,10 @@ private:
  /// multiple, to avoid side-effects from the loop-aware cost model.
  SmallVector<const Loop *> CurrentLoopNest;
  /// Per-depth SCEVs trip counts at every loop level where the tree builder has
  /// joined diverging sibling loops.
  SmallVector<const SCEV *> MergedLoopBTCs;
  /// Maps the loops to their loop nests.
  SmallDenseMap<const Loop *, SmallVector<const Loop *>> LoopToLoopNest;
@@ -12694,7 +12699,29 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
            break;
          ++CommonLen;
        }
        auto ValidateMergedBTCs = [&](unsigned StartDepth) -> bool {
          unsigned EndDepth =
              std::min<unsigned>(NewLoopNest.size(), MergedLoopBTCs.size());
          for (unsigned D = StartDepth; D < EndDepth; ++D) {
            const SCEV *Constraint = MergedLoopBTCs[D];
            if (!Constraint)
              continue;
            const SCEV *NewBTC = SE->getBackedgeTakenCount(NewLoopNest[D]);
            if (isa<SCEVCouldNotCompute>(NewBTC) || NewBTC != Constraint)
              return false;
          }
          return true;
        };
        auto BailOutToGather = [&]() {
          LLVM_DEBUG(dbgs()
                     << "SLP: Sibling loops have different trip counts.\n");
          newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
        };
        if (CurrentLoopNest.empty()) {
          if (!ValidateMergedBTCs(0)) {
            BailOutToGather();
            return;
          }
          CurrentLoopNest.assign(NewLoopNest);
        } else if (CommonLen < CurrentLoopNest.size() &&
                   CommonLen < NewLoopNest.size()) {
@@ -12711,14 +12738,22 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
          const SCEV *BecA = SE->getBackedgeTakenCount(SibA);
          const SCEV *BecB = SE->getBackedgeTakenCount(SibB);
          if (isa<SCEVCouldNotCompute>(BecA) || BecA != BecB) {
            LLVM_DEBUG(dbgs()
                       << "SLP: Sibling loops have different trip counts.\n");
            newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
            BailOutToGather();
            return;
          }
          if (!ValidateMergedBTCs(CommonLen + 1)) {
            BailOutToGather();
            return;
          }
          if (MergedLoopBTCs.size() <= CommonLen)
            MergedLoopBTCs.resize(CommonLen + 1, nullptr);
          MergedLoopBTCs[CommonLen] = BecA;
          CurrentLoopNest.truncate(CommonLen);
        } else if (NewLoopNest.size() > CurrentLoopNest.size()) {
          // New entry lives deeper in the same nest chain; extend.
          if (!ValidateMergedBTCs(CurrentLoopNest.size())) {
            BailOutToGather();
            return;
          }
          CurrentLoopNest.append(
              std::next(NewLoopNest.begin(), CurrentLoopNest.size()),
              NewLoopNest.end());
+24 −6
Original line number Diff line number Diff line
@@ -9,18 +9,36 @@
; YAML-NEXT: Function:        sibling_root_loops_mismatched_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '31'
; YAML-NEXT:   - Cost:            '27'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
; YAML-NEXT:   - TreeSize:        '9'
; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            StoresVectorized
; YAML-NEXT: Function:        sibling_root_loops_mismatched_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '29'
; YAML-NEXT:   - Cost:            '25'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
; YAML-NEXT:   - TreeSize:        '9'
; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            VectorizedList
; YAML-NEXT: Function:        sibling_root_loops_mismatched_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '4'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '4'
; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            VectorizedList
; YAML-NEXT: Function:        sibling_root_loops_mismatched_tripcount
; YAML-NEXT: Args:
; YAML-NEXT:   - String:          'SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '4'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '4'
define void @sibling_root_loops_mismatched_tripcount(ptr %dst, i64 %n, i64 %m, i32 %sel) {
; CHECK-LABEL: define void @sibling_root_loops_mismatched_tripcount(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]], i32 [[SEL:%.*]]) {
@@ -125,7 +143,7 @@ merge:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '200'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
; YAML-NEXT:   - TreeSize:        '9'
; YAML:      --- !Passed
; YAML-NEXT: Pass:            slp-vectorizer
; YAML-NEXT: Name:            StoresVectorized
@@ -134,7 +152,7 @@ merge:
; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
; YAML-NEXT:   - Cost:            '200'
; YAML-NEXT:   - String:          ' and with tree size '
; YAML-NEXT:   - TreeSize:        '11'
; YAML-NEXT:   - TreeSize:        '9'
define void @sibling_inner_loops_mismatched_tripcount(ptr %dst, i64 %n, i64 %m, i32 %sel) {
; CHECK-LABEL: define void @sibling_inner_loops_mismatched_tripcount(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]], i32 [[SEL:%.*]]) {