Commit 7cdebac0 authored by Adam Nemet's avatar Adam Nemet
Browse files

[LAA] Lift RuntimePointerCheck out of LoopAccessInfo, NFC

I am planning to add more nested classes inside RuntimePointerCheck so
all these triple-nesting would be hard to follow.

Also rename it to RuntimePointerChecking (i.e. append 'ing').

llvm-svn: 242218
parent 9bbad03b
Loading
Loading
Loading
Loading
+120 −121
Original line number Diff line number Diff line
@@ -292,26 +292,10 @@ private:
  bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
};

/// \brief Drive the analysis of memory accesses in the loop
///
/// This class is responsible for analyzing the memory accesses of a loop.  It
/// collects the accesses and then its main helper the AccessAnalysis class
/// finds and categorizes the dependences in buildDependenceSets.
///
/// For memory dependences that can be analyzed at compile time, it determines
/// whether the dependence is part of cycle inhibiting vectorization.  This work
/// is delegated to the MemoryDepChecker class.
///
/// For memory dependences that cannot be determined at compile time, it
/// generates run-time checks to prove independence.  This is done by
/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
/// RuntimePointerCheck class.
class LoopAccessInfo {
public:
/// This struct holds information about the memory runtime legality check that
/// a group of pointers do not overlap.
  struct RuntimePointerCheck {
    RuntimePointerCheck(ScalarEvolution *SE) : Need(false), SE(SE) {}
struct RuntimePointerChecking {
  RuntimePointerChecking(ScalarEvolution *SE) : Need(false), SE(SE) {}

  /// Reset the state of the pointer runtime information.
  void reset() {
@@ -337,7 +321,7 @@ public:
  struct CheckingPtrGroup {
    /// \brief Create a new pointer checking group containing a single
    /// pointer, with index \p Index in RtCheck.
      CheckingPtrGroup(unsigned Index, RuntimePointerCheck &RtCheck)
    CheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck)
        : RtCheck(RtCheck), High(RtCheck.Ends[Index]),
          Low(RtCheck.Starts[Index]) {
      Members.push_back(Index);
@@ -353,7 +337,7 @@ public:
    /// Constitutes the context of this pointer checking group. For each
    /// pointer that is a member of this group we will retain the index
    /// at which it appears in RtCheck.
      RuntimePointerCheck &RtCheck;
    RuntimePointerChecking &RtCheck;
    /// The SCEV expression which represents the upper bound of all the
    /// pointers in this group.
    const SCEV *High;
@@ -382,8 +366,7 @@ public:

  /// \brief Decide if we need to add a check between two groups of pointers,
  /// according to needsChecking.
    bool needsChecking(const CheckingPtrGroup &M,
                       const CheckingPtrGroup &N,
  bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N,
                     const SmallVectorImpl<int> *PtrPartition) const;

  /// \brief Return true if any pointer requires run-time checking according
@@ -425,6 +408,22 @@ public:
  ScalarEvolution *SE;
};

/// \brief Drive the analysis of memory accesses in the loop
///
/// This class is responsible for analyzing the memory accesses of a loop.  It
/// collects the accesses and then its main helper the AccessAnalysis class
/// finds and categorizes the dependences in buildDependenceSets.
///
/// For memory dependences that can be analyzed at compile time, it determines
/// whether the dependence is part of cycle inhibiting vectorization.  This work
/// is delegated to the MemoryDepChecker class.
///
/// For memory dependences that cannot be determined at compile time, it
/// generates run-time checks to prove independence.  This is done by
/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
/// RuntimePointerCheck class.
class LoopAccessInfo {
public:
  LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
                 const TargetLibraryInfo *TLI, AliasAnalysis *AA,
                 DominatorTree *DT, LoopInfo *LI,
@@ -434,15 +433,15 @@ public:
  /// no memory dependence cycles.
  bool canVectorizeMemory() const { return CanVecMem; }

  const RuntimePointerCheck *getRuntimePointerCheck() const {
    return &PtrRtCheck;
  const RuntimePointerChecking *getRuntimePointerChecking() const {
    return &PtrRtChecking;
  }

  /// \brief Number of memchecks required to prove independence of otherwise
  /// may-alias pointers.
  unsigned getNumRuntimePointerChecks(
    const SmallVectorImpl<int> *PtrPartition = nullptr) const {
    return PtrRtCheck.getNumberOfChecks(PtrPartition);
    return PtrRtChecking.getNumberOfChecks(PtrPartition);
  }

  /// Return true if the block BB needs to be predicated in order for the loop
@@ -512,7 +511,7 @@ private:

  /// We need to check that all of the pointers in this list are disjoint
  /// at runtime.
  RuntimePointerCheck PtrRtCheck;
  RuntimePointerChecking PtrRtChecking;

  /// \brief the Memory Dependence Checker which can determine the
  /// loop-independent and loop-carried dependences between memory accesses.
+41 −43
Original line number Diff line number Diff line
@@ -119,8 +119,8 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
  return SE->getSCEV(Ptr);
}

void LoopAccessInfo::RuntimePointerCheck::insert(
    Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId,
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
                                    unsigned DepSetId, unsigned ASId,
                                    const ValueToValueMap &Strides) {
  // Get the stride replaced scev.
  const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
@@ -137,7 +137,7 @@ void LoopAccessInfo::RuntimePointerCheck::insert(
  Exprs.push_back(Sc);
}

bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
bool RuntimePointerChecking::needsChecking(
    const CheckingPtrGroup &M, const CheckingPtrGroup &N,
    const SmallVectorImpl<int> *PtrPartition) const {
  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
@@ -161,8 +161,7 @@ static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
  return I;
}

bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer(
    unsigned Index) {
bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
  // Compare the starts and ends with the known minimum and maximum
  // of this set. We need to know how we compare against the min/max
  // of the set in order to be able to emit memchecks.
@@ -186,9 +185,8 @@ bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer(
  return true;
}

void LoopAccessInfo::RuntimePointerCheck::groupChecks(
    MemoryDepChecker::DepCandidates &DepCands,
    bool UseDependencies) {
void RuntimePointerChecking::groupChecks(
    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
  // We build the groups from dependency candidates equivalence classes
  // because:
  //    - We know that pointers in the same equivalence class share
@@ -283,7 +281,7 @@ void LoopAccessInfo::RuntimePointerCheck::groupChecks(
  }
}

bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
bool RuntimePointerChecking::needsChecking(
    unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
  // No need to check if two readonly pointers intersect.
  if (!IsWritePtr[I] && !IsWritePtr[J])
@@ -307,7 +305,7 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
  return true;
}

void LoopAccessInfo::RuntimePointerCheck::print(
void RuntimePointerChecking::print(
    raw_ostream &OS, unsigned Depth,
    const SmallVectorImpl<int> *PtrPartition) const {

@@ -353,7 +351,7 @@ void LoopAccessInfo::RuntimePointerCheck::print(
  }
}

unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
unsigned RuntimePointerChecking::getNumberOfChecks(
    const SmallVectorImpl<int> *PtrPartition) const {

  unsigned NumPartitions = CheckingGroups.size();
@@ -366,7 +364,7 @@ unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
  return CheckCount;
}

bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
bool RuntimePointerChecking::needsAnyChecking(
    const SmallVectorImpl<int> *PtrPartition) const {
  unsigned NumPointers = Pointers.size();

@@ -414,9 +412,8 @@ public:
  ///
  /// Returns true if we need no check or if we do and we can generate them
  /// (i.e. the pointers have computable bounds).
  bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
                       ScalarEvolution *SE, Loop *TheLoop,
                       const ValueToValueMap &Strides,
  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
                       Loop *TheLoop, const ValueToValueMap &Strides,
                       bool ShouldCheckStride = false);

  /// \brief Goes over all memory accesses, checks whether a RT check is needed
@@ -492,9 +489,10 @@ static bool hasComputableBounds(ScalarEvolution *SE,
  return AR->isAffine();
}

bool AccessAnalysis::canCheckPtrAtRT(
    LoopAccessInfo::RuntimePointerCheck &RtCheck, ScalarEvolution *SE,
    Loop *TheLoop, const ValueToValueMap &StridesMap, bool ShouldCheckStride) {
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
                                     ScalarEvolution *SE, Loop *TheLoop,
                                     const ValueToValueMap &StridesMap,
                                     bool ShouldCheckStride) {
  // Find pointers with computable bounds. We are going to use this information
  // to place a runtime bound check.
  bool CanDoRT = true;
@@ -1320,8 +1318,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
  unsigned NumReads = 0;
  unsigned NumReadWrites = 0;

  PtrRtCheck.Pointers.clear();
  PtrRtCheck.Need = false;
  PtrRtChecking.Pointers.clear();
  PtrRtChecking.Need = false;

  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();

@@ -1481,7 +1479,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
  // Find pointers with computable bounds. We are going to use this information
  // to place a runtime bound check.
  bool CanDoRTIfNeeded =
      Accesses.canCheckPtrAtRT(PtrRtCheck, SE, TheLoop, Strides);
      Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
  if (!CanDoRTIfNeeded) {
    emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
    DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
@@ -1505,11 +1503,11 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
      // Clear the dependency checks. We assume they are not needed.
      Accesses.resetDepChecks(DepChecker);

      PtrRtCheck.reset();
      PtrRtCheck.Need = true;
      PtrRtChecking.reset();
      PtrRtChecking.Need = true;

      CanDoRTIfNeeded =
          Accesses.canCheckPtrAtRT(PtrRtCheck, SE, TheLoop, Strides, true);
          Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);

      // Check that we found the bounds for the pointer.
      if (!CanDoRTIfNeeded) {
@@ -1526,7 +1524,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {

  if (CanVecMem)
    DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
                 << (PtrRtCheck.Need ? "" : " don't")
                 << (PtrRtChecking.Need ? "" : " don't")
                 << " need runtime memory checks.\n");
  else {
    emitAnalysis(LoopAccessReport() <<
@@ -1566,7 +1564,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,

std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
    Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
  if (!PtrRtCheck.Need)
  if (!PtrRtChecking.Need)
    return std::make_pair(nullptr, nullptr);

  SmallVector<TrackingVH<Value>, 2> Starts;
@@ -1576,10 +1574,10 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
  SCEVExpander Exp(*SE, DL, "induction");
  Instruction *FirstInst = nullptr;

  for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
    const RuntimePointerCheck::CheckingPtrGroup &CG =
        PtrRtCheck.CheckingGroups[i];
    Value *Ptr = PtrRtCheck.Pointers[CG.Members[0]];
  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
    const RuntimePointerChecking::CheckingPtrGroup &CG =
        PtrRtChecking.CheckingGroups[i];
    Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]];
    const SCEV *Sc = SE->getSCEV(Ptr);

    if (SE->isLoopInvariant(Sc, TheLoop)) {
@@ -1606,14 +1604,14 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
  IRBuilder<> ChkBuilder(Loc);
  // Our instructions might fold to a constant.
  Value *MemoryRuntimeCheck = nullptr;
  for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
    for (unsigned j = i + 1; j < PtrRtCheck.CheckingGroups.size(); ++j) {
      const RuntimePointerCheck::CheckingPtrGroup &CGI =
          PtrRtCheck.CheckingGroups[i];
      const RuntimePointerCheck::CheckingPtrGroup &CGJ =
          PtrRtCheck.CheckingGroups[j];

      if (!PtrRtCheck.needsChecking(CGI, CGJ, PtrPartition))
  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
    for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
      const RuntimePointerChecking::CheckingPtrGroup &CGI =
          PtrRtChecking.CheckingGroups[i];
      const RuntimePointerChecking::CheckingPtrGroup &CGJ =
          PtrRtChecking.CheckingGroups[j];

      if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
        continue;

      unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
@@ -1664,8 +1662,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                               const TargetLibraryInfo *TLI, AliasAnalysis *AA,
                               DominatorTree *DT, LoopInfo *LI,
                               const ValueToValueMap &Strides)
    : PtrRtCheck(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI),
      AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
    : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
      TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
      MaxSafeDepDistBytes(-1U), CanVecMem(false),
      StoreToLoopInvariantAddress(false) {
  if (canAnalyzeLoop())
@@ -1674,7 +1672,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,

void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
  if (CanVecMem) {
    if (PtrRtCheck.Need)
    if (PtrRtChecking.Need)
      OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
    else
      OS.indent(Depth) << "Memory dependences are safe\n";
@@ -1693,7 +1691,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
    OS.indent(Depth) << "Too many interesting dependences, not recorded\n";

  // List the pair of accesses need run-time checks to prove independence.
  PtrRtCheck.print(OS, Depth);
  PtrRtChecking.print(OS, Depth);
  OS << "\n";

  OS.indent(Depth) << "Store to invariant address was "
+2 −3
Original line number Diff line number Diff line
@@ -432,8 +432,7 @@ public:
  /// partitions its entry is set to -1.
  SmallVector<int, 8>
  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
    const LoopAccessInfo::RuntimePointerCheck *RtPtrCheck =
        LAI.getRuntimePointerCheck();
    const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();

    unsigned N = RtPtrCheck->Pointers.size();
    SmallVector<int, 8> PtrToPartitions(N);
@@ -753,7 +752,7 @@ private:
    LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition);
    if (LVer.needsRuntimeChecks()) {
      DEBUG(dbgs() << "\nPointers:\n");
      DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition));
      DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition));
      LVer.versionLoop(this);
      LVer.addPHINodes(DefsUsedOutside);
    }
+1 −1
Original line number Diff line number Diff line
@@ -32,7 +32,7 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
}

bool LoopVersioning::needsRuntimeChecks() const {
  return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition);
  return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition);
}

void LoopVersioning::versionLoop(Pass *P) {
+9 −8
Original line number Diff line number Diff line
@@ -924,8 +924,8 @@ public:
  bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }

  /// Returns the information that we collected about runtime memory check.
  const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const {
    return LAI->getRuntimePointerCheck();
  const RuntimePointerChecking *getRuntimePointerChecking() const {
    return LAI->getRuntimePointerChecking();
  }

  const LoopAccessInfo *getLAI() const {
@@ -3873,9 +3873,10 @@ bool LoopVectorizationLegality::canVectorize() {
  // Collect all of the variables that remain uniform after vectorization.
  collectLoopUniforms();

  DEBUG(dbgs() << "LV: We can vectorize this loop" <<
        (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
         "")
  DEBUG(dbgs() << "LV: We can vectorize this loop"
               << (LAI->getRuntimePointerChecking()->Need
                       ? " (with a runtime bound check)"
                       : "")
               << "!\n");

  // Analyze interleaved memory accesses.
@@ -4449,7 +4450,7 @@ LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
  // Width 1 means no vectorize
  VectorizationFactor Factor = { 1U, 0U };
  if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
  if (OptForSize && Legal->getRuntimePointerChecking()->Need) {
    emitAnalysis(VectorizationReport() <<
                 "runtime pointer checks needed. Enable vectorization of this "
                 "loop with '#pragma clang loop vectorize(enable)' when "
@@ -4713,7 +4714,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
  // Note that if we've already vectorized the loop we will have done the
  // runtime check and so interleaving won't require further checks.
  bool InterleavingRequiresRuntimePointerCheck =
      (VF == 1 && Legal->getRuntimePointerCheck()->Need);
      (VF == 1 && Legal->getRuntimePointerChecking()->Need);

  // We want to interleave small loops in order to reduce the loop overhead and
  // potentially expose ILP opportunities.