Commit 259eb619 authored by Sam Parker's avatar Sam Parker
Browse files

Revert "[CostModel] Unify Intrinsic Costs."

This reverts commit de71def3.

This is causing some very large changes, so I'm first going to break
this patch down and re-commit in parts.
parent 111ddc57
......@@ -38,7 +38,6 @@ class AssumptionCache;
class BlockFrequencyInfo;
class DominatorTree;
class BranchInst;
class CallBase;
class Function;
class GlobalValue;
class IntrinsicInst;
......@@ -121,12 +120,10 @@ class IntrinsicCostAttributes {
public:
IntrinsicCostAttributes(const IntrinsicInst &I);
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor);
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor, unsigned ScalarCost);
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
......@@ -144,7 +141,7 @@ public:
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys);
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
ArrayRef<Value *> Args);
Intrinsic::ID getID() const { return IID; }
......@@ -291,6 +288,18 @@ public:
/// scientific. A target may has no bonus on vector instructions.
int getInlinerVectorBonusPercent() const;
/// Estimate the cost of an intrinsic when lowered.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys,
const User *U = nullptr,
TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// Estimate the cost of an intrinsic when lowered.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U = nullptr,
TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// \return the expected cost of a memcpy, which could e.g. depend on the
/// source/destination type and alignment and the number of bytes copied.
int getMemcpyCost(const Instruction *I) const;
......@@ -1222,6 +1231,13 @@ public:
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual int getInlinerVectorBonusPercent() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U,
enum TargetCostKind CostKind) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U,
enum TargetCostKind CostKind) = 0;
virtual int getMemcpyCost(const Instruction *I) = 0;
virtual unsigned
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
......@@ -1479,6 +1495,18 @@ public:
int getInlinerVectorBonusPercent() override {
return Impl.getInlinerVectorBonusPercent();
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys,
const User *U = nullptr,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U = nullptr,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
}
int getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
......
......@@ -462,39 +462,6 @@ public:
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
switch (ICA.getID()) {
default:
break;
case Intrinsic::annotation:
case Intrinsic::assume:
case Intrinsic::sideeffect:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::is_constant:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::objectsize:
case Intrinsic::ptr_annotation:
case Intrinsic::var_annotation:
case Intrinsic::experimental_gc_result:
case Intrinsic::experimental_gc_relocate:
case Intrinsic::coro_alloc:
case Intrinsic::coro_begin:
case Intrinsic::coro_free:
case Intrinsic::coro_end:
case Intrinsic::coro_frame:
case Intrinsic::coro_size:
case Intrinsic::coro_suspend:
case Intrinsic::coro_param:
case Intrinsic::coro_subfn_addr:
// These intrinsics don't actually represent code after lowering.
return 0;
}
return 1;
}
......@@ -772,32 +739,78 @@ public:
return TTI::TCC_Basic;
}
int getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U,
TTI::TargetCostKind TCK_SizeAndLatency) {
switch (IID) {
default:
// Intrinsics rarely (if ever) have normal argument setup constraints.
// Model them as having a basic instruction cost.
return TTI::TCC_Basic;
// TODO: other libc intrinsics.
case Intrinsic::memcpy:
return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
case Intrinsic::annotation:
case Intrinsic::assume:
case Intrinsic::sideeffect:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::is_constant:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::objectsize:
case Intrinsic::ptr_annotation:
case Intrinsic::var_annotation:
case Intrinsic::experimental_gc_result:
case Intrinsic::experimental_gc_relocate:
case Intrinsic::coro_alloc:
case Intrinsic::coro_begin:
case Intrinsic::coro_free:
case Intrinsic::coro_end:
case Intrinsic::coro_frame:
case Intrinsic::coro_size:
case Intrinsic::coro_suspend:
case Intrinsic::coro_param:
case Intrinsic::coro_subfn_addr:
// These intrinsics don't actually represent code after lowering.
return TTI::TCC_Free;
}
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, const User *U,
TTI::TargetCostKind CostKind) {
// Delegate to the generic intrinsic handling code. This mostly provides an
// opportunity for targets to (for example) special case the cost of
// certain intrinsics based on constants used as arguments.
SmallVector<Type *, 8> ParamTys;
ParamTys.reserve(Arguments.size());
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
ParamTys.push_back(Arguments[Idx]->getType());
return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U,
CostKind);
}
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
auto *TargetTTI = static_cast<T *>(this);
// FIXME: Unlikely to be true for anything but CodeSize.
if (const auto *CB = dyn_cast<CallBase>(U)) {
// Special-case throughput here because it can make wild differences
// whether the arguments are passed around or just the arg types. The
// IntrinsicCostAttribute constructor used here will save all the
// available information.
// FIXME: More information isn't always useful and we shouldn't have to
// make a special case like this.
if (CostKind == TTI::TCK_RecipThroughput) {
if (auto *II = dyn_cast<IntrinsicInst>(CB)) {
IntrinsicCostAttributes Attrs(*II);
return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind);
}
return -1; // We know nothing about this call.
}
// FIXME: Unlikely to be true for anything but CodeSize.
const Function *F = CB->getCalledFunction();
if (F) {
FunctionType *FTy = F->getFunctionType();
if (Intrinsic::ID IID = F->getIntrinsicID()) {
IntrinsicCostAttributes Attrs(IID, *CB);
return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind);
SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(),
ParamTys, U, CostKind);
}
if (!TargetTTI->isLoweredToCall(F))
......
......@@ -296,6 +296,30 @@ public:
return BaseT::getGEPCost(PointeeType, Ptr, Operands);
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments, const User *U,
TTI::TargetCostKind CostKind) {
return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U,
TTI::TargetCostKind CostKind) {
if (IID == Intrinsic::cttz) {
if (getTLI()->isCheapToSpeculateCttz())
return TargetTransformInfo::TCC_Basic;
return TargetTransformInfo::TCC_Expensive;
}
if (IID == Intrinsic::ctlz) {
if (getTLI()->isCheapToSpeculateCtlz())
return TargetTransformInfo::TCC_Basic;
return TargetTransformInfo::TCC_Expensive;
}
return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JumpTableSize,
ProfileSummaryInfo *PSI,
......@@ -1067,40 +1091,21 @@ public:
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
Intrinsic::ID IID = ICA.getID();
auto *ConcreteTTI = static_cast<T *>(this);
// Special case some scalar intrinsics.
if (CostKind != TTI::TCK_RecipThroughput) {
switch (IID) {
default:
break;
case Intrinsic::cttz:
if (getTLI()->isCheapToSpeculateCttz())
return TargetTransformInfo::TCC_Basic;
break;
case Intrinsic::ctlz:
if (getTLI()->isCheapToSpeculateCtlz())
return TargetTransformInfo::TCC_Basic;
break;
case Intrinsic::memcpy:
return ConcreteTTI->getMemcpyCost(ICA.getInst());
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
// TODO: Combine these two logic paths.
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
Intrinsic::ID IID = ICA.getID();
const IntrinsicInst *I = ICA.getInst();
const SmallVectorImpl<Value *> &Args = ICA.getArgs();
FastMathFlags FMF = ICA.getFlags();
Type *RetTy = ICA.getReturnType();
const SmallVectorImpl<Value *> &Args = ICA.getArgs();
unsigned VF = ICA.getVectorFactor();
FastMathFlags FMF = ICA.getFlags();
unsigned RetVF =
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getNumElements() : 1);
assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
auto *ConcreteTTI = static_cast<T *>(this);
switch (IID) {
default: {
......@@ -1587,14 +1592,13 @@ public:
CostKind) +
ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
CostKind);
if (IID == Intrinsic::experimental_constrained_fmuladd) {
IntrinsicCostAttributes FMulAttrs(
Intrinsic::experimental_constrained_fmul, RetTy, Tys);
IntrinsicCostAttributes FAddAttrs(
Intrinsic::experimental_constrained_fadd, RetTy, Tys);
return ConcreteTTI->getIntrinsicInstrCost(FMulAttrs, CostKind) +
ConcreteTTI->getIntrinsicInstrCost(FAddAttrs, CostKind);
}
if (IID == Intrinsic::experimental_constrained_fmuladd)
return ConcreteTTI->getIntrinsicCost(
Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr,
CostKind) +
ConcreteTTI->getIntrinsicCost(
Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr,
CostKind);
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
......
......@@ -63,20 +63,7 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
FMF = FPMO->getFastMathFlags();
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
const CallBase &CI) :
II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id) {
if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
FunctionType *FTy =
CI.getCalledFunction()->getFunctionType();
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
const CallBase &CI,
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor) :
RetTy(CI.getType()), IID(Id), VF(Factor) {
......@@ -89,8 +76,7 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
const CallBase &CI,
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor,
unsigned ScalarCost) :
RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
......@@ -250,6 +236,15 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments,
const User *U,
TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) const {
......@@ -1421,7 +1416,11 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
}
case Instruction::Call:
return getUserCost(I, CostKind);
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
IntrinsicCostAttributes CostAttrs(*II);
return getIntrinsicInstrCost(CostAttrs, CostKind);
}
return -1;
default:
// We don't have any information on this instruction.
return -1;
......
......@@ -560,9 +560,6 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
if (ICA.getID() == Intrinsic::fabs)
return 0;
if (!intrinsicHasPackedVectorBenefit(ICA.getID()))
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
......
......@@ -2699,9 +2699,6 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
if (CostKind != TTI::TCK_RecipThroughput)
return 1;
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
......@@ -3935,9 +3932,6 @@ int X86TTIImpl::getGatherScatterOpCost(
unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
if (CostKind != TTI::TCK_RecipThroughput)
return 1;
assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
......
......@@ -1535,7 +1535,7 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
// %inc = add nsw %i.0, 1
// br i1 %tobool
Value *Args[] =
const Value *Args[] =
{InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
: ConstantInt::getFalse(InitX->getContext())};
......@@ -1544,11 +1544,9 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
uint32_t HeaderSize =
std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
int Cost =
TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
if (HeaderSize != IdiomCanonicalSize &&
Cost > TargetTransformInfo::TCC_Basic)
TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
TargetTransformInfo::TCC_Basic)
return false;
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment