[TTI][LV] preferPredicateOverEpilogue (6c2a4f5f) · Commits · llvm-doe / llvm-project

llvm/include/llvm/Analysis/TargetTransformInfo.h

+20 −0

Original line number	Diff line number	Diff line
		@@ -46,6 +46,7 @@ class Function;
		class GlobalValue;
		class IntrinsicInst;
		class LoadInst;
		class LoopAccessInfo;
		class Loop;
		class ProfileSummaryInfo;
		class SCEV;
		@@ -518,6 +519,13 @@ public:
		TargetLibraryInfo *LibInfo,
		HardwareLoopInfo &HWLoopInfo) const;

		/// Query the target whether it would be prefered to create a predicated vector
		/// loop, which can avoid the need to emit a scalar epilogue loop.
		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
		AssumptionCache &AC, TargetLibraryInfo *TLI,
		DominatorTree *DT,
		const LoopAccessInfo *LAI) const;

		/// @}

		/// \name Scalar Target Information
		@@ -1201,6 +1209,12 @@ public:
		AssumptionCache &AC,
		TargetLibraryInfo *LibInfo,
		HardwareLoopInfo &HWLoopInfo) = 0;
		virtual bool preferPredicateOverEpilogue(Loop L, LoopInfo LI,
		ScalarEvolution &SE,
		AssumptionCache &AC,
		TargetLibraryInfo *TLI,
		DominatorTree *DT,
		const LoopAccessInfo *LAI) = 0;
		virtual bool isLegalAddImmediate(int64_t Imm) = 0;
		virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
		virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
		@@ -1471,6 +1485,12 @@ public:
		HardwareLoopInfo &HWLoopInfo) override {
		return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
		}
		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
		AssumptionCache &AC, TargetLibraryInfo *TLI,
		DominatorTree *DT,
		const LoopAccessInfo *LAI) override {
		return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
		}
		bool isLegalAddImmediate(int64_t Imm) override {
		return Impl.isLegalAddImmediate(Imm);
		}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -213,6 +213,13 @@ public:
		return false;
		}

		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
		AssumptionCache &AC, TargetLibraryInfo *TLI,
		DominatorTree *DT,
		const LoopAccessInfo *LAI) const {
		return false;
		}

		void getUnrollingPreferences(Loop *, ScalarEvolution &,
		TTI::UnrollingPreferences &) {}

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -510,6 +510,13 @@ public:
		return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
		}

		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
		AssumptionCache &AC, TargetLibraryInfo *TLI,
		DominatorTree *DT,
		const LoopAccessInfo *LAI) {
		return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
		}

		int getInstructionLatency(const Instruction *I) {
		if (isa<LoadInst>(I))
		return getST()->getSchedModel().DefaultLoadLatency;

llvm/lib/Analysis/TargetTransformInfo.cpp

+6 −0

Original line number	Diff line number	Diff line
		@@ -243,6 +243,12 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
		return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
		}

		bool TargetTransformInfo::preferPredicateOverEpilogue(Loop L, LoopInfo LI,
		ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI,
		DominatorTree DT, const LoopAccessInfo LAI) const {
		return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
		}

		void TargetTransformInfo::getUnrollingPreferences(
		Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
		return TTIImpl->getUnrollingPreferences(L, SE, UP);

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

+44 −0

Original line number	Diff line number	Diff line
		@@ -1000,6 +1000,50 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
		return true;
		}

		bool ARMTTIImpl::preferPredicateOverEpilogue(Loop L, LoopInfo LI,
		ScalarEvolution &SE,
		AssumptionCache &AC,
		TargetLibraryInfo *TLI,
		DominatorTree *DT,
		const LoopAccessInfo *LAI) {
		// Creating a predicated vector loop is the first step for generating a
		// tail-predicated hardware loop, for which we need the MVE masked
		// load/stores instructions:
		if (!ST->hasMVEIntegerOps())
		return false;

		HardwareLoopInfo HWLoopInfo(L);
		if (!HWLoopInfo.canAnalyze(*LI)) {
		LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
		"analyzable.\n");
		return false;
		}

		// This checks if we have the low-overhead branch architecture
		// extension, and if we will create a hardware-loop:
		if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
		LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
		"profitable.\n");
		return false;
		}

		if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT)) {
		LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
		"a candidate.\n");
		return false;
		}

		// TODO: to set up a tail-predicated loop, which works by setting up
		// the total number of elements processed by the loop, we need to
		// determine the element size here, and if it is uniform for all operations
		// in the vector loop. This means we will reject narrowing/widening
		// operations, and don't want to predicate the vector loop, which is
		// the main prep step for tail-predicated loops.

		return false;
		}


		void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
		TTI::UnrollingPreferences &UP) {
		// Only currently enable these preferences for M-Class cores.