Commit de71def3 authored by Sam Parker's avatar Sam Parker
Browse files

[CostModel] Unify Intrinsic Costs.

With the two getIntrinsicInstrCosts folded into one, now fold in the
scalar/code-size orientated getIntrinsicCost. This involved sinking
cost of the TTIImpl into the base implementation, as it performs no
target checks. The opcodes remaining were memcpy, cttz and ctlz which
now have special handling in the BasicTTI implementation.
getInstructionThroughput can now directly return the result of
getUserCost.

This had required a change in the AMDGPU backend for fabs and its
always 'free'. I've also changed the X86 backend to return '1' for
any intrinsic when the CostKind isn't RecipThroughput.

Though this intended to be a non-functional change, there are many
paths being combined here so I would be very surprised if this didn't
have an effect.

Differential Revision: https://reviews.llvm.org/D80012
parent 7606a543
......@@ -38,6 +38,7 @@ class AssumptionCache;
class BlockFrequencyInfo;
class DominatorTree;
class BranchInst;
class CallBase;
class Function;
class GlobalValue;
class IntrinsicInst;
......@@ -120,10 +121,12 @@ class IntrinsicCostAttributes {
public:
IntrinsicCostAttributes(const IntrinsicInst &I);
IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
unsigned Factor);
IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
unsigned Factor, unsigned ScalarCost);
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
......@@ -141,7 +144,7 @@ public:
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys);
IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Value *> Args);
Intrinsic::ID getID() const { return IID; }
......@@ -288,18 +291,6 @@ public:
/// scientific. A target may has no bonus on vector instructions.
int getInlinerVectorBonusPercent() const;
/// Estimate the cost of an intrinsic when lowered.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys,
const User *U = nullptr,
TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// Estimate the cost of an intrinsic when lowered.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U = nullptr,
TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// \return the expected cost of a memcpy, which could e.g. depend on the
/// source/destination type and alignment and the number of bytes copied.
int getMemcpyCost(const Instruction *I) const;
......@@ -1231,13 +1222,6 @@ public:
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual int getInlinerVectorBonusPercent() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U,
enum TargetCostKind CostKind) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U,
enum TargetCostKind CostKind) = 0;
virtual int getMemcpyCost(const Instruction *I) = 0;
virtual unsigned
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
......@@ -1495,18 +1479,6 @@ public:
int getInlinerVectorBonusPercent() override {
return Impl.getInlinerVectorBonusPercent();
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys,
const User *U = nullptr,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U = nullptr,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
}
int getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
......
......@@ -462,6 +462,39 @@ public:
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
switch (ICA.getID()) {
default:
break;
case Intrinsic::annotation:
case Intrinsic::assume:
case Intrinsic::sideeffect:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::is_constant:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::objectsize:
case Intrinsic::ptr_annotation:
case Intrinsic::var_annotation:
case Intrinsic::experimental_gc_result:
case Intrinsic::experimental_gc_relocate:
case Intrinsic::coro_alloc:
case Intrinsic::coro_begin:
case Intrinsic::coro_free:
case Intrinsic::coro_end:
case Intrinsic::coro_frame:
case Intrinsic::coro_size:
case Intrinsic::coro_suspend:
case Intrinsic::coro_param:
case Intrinsic::coro_subfn_addr:
// These intrinsics don't actually represent code after lowering.
return 0;
}
return 1;
}
......@@ -739,78 +772,32 @@ public:
return TTI::TCC_Basic;
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U,
TTI::TargetCostKind TCK_SizeAndLatency) {
switch (IID) {
default:
// Intrinsics rarely (if ever) have normal argument setup constraints.
// Model them as having a basic instruction cost.
return TTI::TCC_Basic;
// TODO: other libc intrinsics.
case Intrinsic::memcpy:
return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
case Intrinsic::annotation:
case Intrinsic::assume:
case Intrinsic::sideeffect:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::is_constant:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::objectsize:
case Intrinsic::ptr_annotation:
case Intrinsic::var_annotation:
case Intrinsic::experimental_gc_result:
case Intrinsic::experimental_gc_relocate:
case Intrinsic::coro_alloc:
case Intrinsic::coro_begin:
case Intrinsic::coro_free:
case Intrinsic::coro_end:
case Intrinsic::coro_frame:
case Intrinsic::coro_size:
case Intrinsic::coro_suspend:
case Intrinsic::coro_param:
case Intrinsic::coro_subfn_addr:
// These intrinsics don't actually represent code after lowering.
return TTI::TCC_Free;
}
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, const User *U,
TTI::TargetCostKind CostKind) {
// Delegate to the generic intrinsic handling code. This mostly provides an
// opportunity for targets to (for example) special case the cost of
// certain intrinsics based on constants used as arguments.
SmallVector<Type *, 8> ParamTys;
ParamTys.reserve(Arguments.size());
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
ParamTys.push_back(Arguments[Idx]->getType());
return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U,
CostKind);
}
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
int getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
auto *TargetTTI = static_cast<T *>(this);
// FIXME: Unlikely to be true for anything but CodeSize.
if (const auto *CB = dyn_cast<CallBase>(U)) {
// Special-case throughput here because it can make wild differences
// whether the arguments are passed around or just the arg types. The
// IntrinsicCostAttribute constructor used here will save all the
// available information.
// FIXME: More information isn't always useful and we shouldn't have to
// make a special case like this.
if (CostKind == TTI::TCK_RecipThroughput) {
if (auto *II = dyn_cast<IntrinsicInst>(CB)) {
IntrinsicCostAttributes Attrs(*II);
return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind);
}
return -1; // We know nothing about this call.
}
// FIXME: Unlikely to be true for anything but CodeSize.
const Function *F = CB->getCalledFunction();
if (F) {
FunctionType *FTy = F->getFunctionType();
if (Intrinsic::ID IID = F->getIntrinsicID()) {
SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(),
ParamTys, U, CostKind);
IntrinsicCostAttributes Attrs(IID, *CB);
return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind);
}
if (!TargetTTI->isLoweredToCall(F))
......
......@@ -296,30 +296,6 @@ public:
return BaseT::getGEPCost(PointeeType, Ptr, Operands);
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, const User *U,
TTI::TargetCostKind CostKind) {
return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U,
TTI::TargetCostKind CostKind) {
if (IID == Intrinsic::cttz) {
if (getTLI()->isCheapToSpeculateCttz())
return TargetTransformInfo::TCC_Basic;
return TargetTransformInfo::TCC_Expensive;
}
if (IID == Intrinsic::ctlz) {
if (getTLI()->isCheapToSpeculateCtlz())
return TargetTransformInfo::TCC_Basic;
return TargetTransformInfo::TCC_Expensive;
}
return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JumpTableSize,
ProfileSummaryInfo *PSI,
......@@ -1091,21 +1067,40 @@ public:
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
Intrinsic::ID IID = ICA.getID();
auto *ConcreteTTI = static_cast<T *>(this);
// Special case some scalar intrinsics.
if (CostKind != TTI::TCK_RecipThroughput) {
switch (IID) {
default:
break;
case Intrinsic::cttz:
if (getTLI()->isCheapToSpeculateCttz())
return TargetTransformInfo::TCC_Basic;
break;
case Intrinsic::ctlz:
if (getTLI()->isCheapToSpeculateCtlz())
return TargetTransformInfo::TCC_Basic;
break;
case Intrinsic::memcpy:
return ConcreteTTI->getMemcpyCost(ICA.getInst());
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
// TODO: Combine these two logic paths.
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
Intrinsic::ID IID = ICA.getID();
const IntrinsicInst *I = ICA.getInst();
Type *RetTy = ICA.getReturnType();
const SmallVectorImpl<Value *> &Args = ICA.getArgs();
unsigned VF = ICA.getVectorFactor();
FastMathFlags FMF = ICA.getFlags();
Type *RetTy = ICA.getReturnType();
unsigned VF = ICA.getVectorFactor();
unsigned RetVF =
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getNumElements() : 1);
assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
auto *ConcreteTTI = static_cast<T *>(this);
switch (IID) {
default: {
......@@ -1592,13 +1587,14 @@ public:
CostKind) +
ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
CostKind);
if (IID == Intrinsic::experimental_constrained_fmuladd)
return ConcreteTTI->getIntrinsicCost(
Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr,
CostKind) +
ConcreteTTI->getIntrinsicCost(
Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr,
CostKind);
if (IID == Intrinsic::experimental_constrained_fmuladd) {
IntrinsicCostAttributes FMulAttrs(
Intrinsic::experimental_constrained_fmul, RetTy, Tys);
IntrinsicCostAttributes FAddAttrs(
Intrinsic::experimental_constrained_fadd, RetTy, Tys);
return ConcreteTTI->getIntrinsicInstrCost(FMulAttrs, CostKind) +
ConcreteTTI->getIntrinsicInstrCost(FAddAttrs, CostKind);
}
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
......
......@@ -63,7 +63,20 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
FMF = FPMO->getFastMathFlags();
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
const CallBase &CI) :
II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id) {
if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
FunctionType *FTy =
CI.getCalledFunction()->getFunctionType();
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
const CallBase &CI,
unsigned Factor) :
RetTy(CI.getType()), IID(Id), VF(Factor) {
......@@ -76,7 +89,8 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
const CallBase &CI,
unsigned Factor,
unsigned ScalarCost) :
RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
......@@ -236,15 +250,6 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U,
TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) const {
......@@ -1416,11 +1421,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
}
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
IntrinsicCostAttributes CostAttrs(*II);
return getIntrinsicInstrCost(CostAttrs, CostKind);
}
return -1;
return getUserCost(I, CostKind);
default:
// We don't have any information on this instruction.
return -1;
......
......@@ -560,6 +560,9 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
if (ICA.getID() == Intrinsic::fabs)
return 0;
if (!intrinsicHasPackedVectorBenefit(ICA.getID()))
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
......
......@@ -2699,6 +2699,9 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
if (CostKind != TTI::TCK_RecipThroughput)
return 1;
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
......@@ -3932,6 +3935,9 @@ int X86TTIImpl::getGatherScatterOpCost(
unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
if (CostKind != TTI::TCK_RecipThroughput)
return 1;
assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
......
......@@ -1535,7 +1535,7 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
// %inc = add nsw %i.0, 1
// br i1 %tobool
const Value *Args[] =
Value *Args[] =
{InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
: ConstantInt::getFalse(InitX->getContext())};
......@@ -1544,9 +1544,11 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
uint32_t HeaderSize =
std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
int Cost =
TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
if (HeaderSize != IdiomCanonicalSize &&
TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
TargetTransformInfo::TCC_Basic)
Cost > TargetTransformInfo::TCC_Basic)
return false;
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment