Commit a6d3bec8 authored by Anna Welker's avatar Anna Welker
Browse files

[TTI][ARM][MVE] Refine gather/scatter cost model

Refines the gather/scatter cost model, but also changes the TTI
function getIntrinsicInstrCost to accept an additional parameter
which is needed for the gather/scatter cost evaluation.
This did require trivial changes in some non-ARM backends to
adopt the new parameter.
Extending gathers and truncating scatters are now priced cheaper.

Differential Revision: https://reviews.llvm.org/D75525
parent 8a125532
......@@ -966,8 +966,11 @@ public:
/// \p VariableMask - true when the memory access is predicated with a mask
/// that is not a compile-time constant
/// \p Alignment - alignment of single element
/// \p I - the optional original context instruction, if one exists, e.g. the
/// load/store to transform or the call to the gather/scatter intrinsic
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment) const;
bool VariableMask, unsigned Alignment,
const Instruction *I = nullptr) const;
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
......@@ -1006,16 +1009,22 @@ public:
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction
/// 3. scalar instruction which is to be vectorized with VF.
/// I is the optional original context instruction holding the call to the
/// intrinsic
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1) const;
unsigned VF = 1,
const Instruction *I = nullptr) const;
/// \returns The cost of Intrinsic instructions. Types analysis only.
/// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
/// arguments and the return value will be computed based on types.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX) const;
/// I is the optional original context instruction holding the call to the
/// intrinsic
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
const Instruction *I = nullptr) const;
/// \returns The cost of Call instructions.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
......@@ -1340,9 +1349,9 @@ public:
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) = 0;
virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Value *Ptr, bool VariableMask,
unsigned Alignment) = 0;
virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment,
const Instruction *I = nullptr) = 0;
virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
......@@ -1355,10 +1364,12 @@ public:
virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
bool IsPairwiseForm, bool IsUnsigned) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed) = 0;
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed,
const Instruction *I) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) = 0;
virtual int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0;
......@@ -1759,11 +1770,11 @@ public:
unsigned AddressSpace) override {
return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
}
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Value *Ptr, bool VariableMask,
unsigned Alignment) override {
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment,
const Instruction *I = nullptr) override {
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment);
Alignment, I);
}
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
......@@ -1781,15 +1792,18 @@ public:
bool IsPairwiseForm, bool IsUnsigned) override {
return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed,
const Instruction *I) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, I);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) override {
return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
}
int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) override {
return Impl.getCallInstrCost(F, RetTy, Tys);
......
......@@ -481,8 +481,8 @@ public:
}
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask,
unsigned Alignment) {
bool VariableMask, unsigned Alignment,
const Instruction *I = nullptr) {
return 1;
}
......@@ -497,11 +497,13 @@ public:
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed) {
unsigned ScalarizationCostPassed,
const Instruction *I) {
return 1;
}
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) {
return 1;
}
......
......@@ -1072,7 +1072,8 @@ public:
/// Get intrinsic cost based on arguments.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1) {
unsigned VF = 1,
const Instruction *I = nullptr) {
unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
auto *ConcreteTTI = static_cast<T *>(this);
......@@ -1109,16 +1110,17 @@ public:
Value *Mask = Args[3];
bool VarMask = !isa<Constant>(Mask);
unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
return ConcreteTTI->getGatherScatterOpCost(
Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
return ConcreteTTI->getGatherScatterOpCost(Instruction::Store,
Args[0]->getType(), Args[1],
VarMask, Alignment, I);
}
case Intrinsic::masked_gather: {
assert(VF == 1 && "Can't vectorize types here.");
Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);
unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
Args[0], VarMask, Alignment);
return ConcreteTTI->getGatherScatterOpCost(
Instruction::Load, RetTy, Args[0], VarMask, Alignment, I);
}
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
......@@ -1180,7 +1182,8 @@ public:
/// based on types.
unsigned getIntrinsicInstrCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max(),
const Instruction *I = nullptr) {
auto *ConcreteTTI = static_cast<T *>(this);
SmallVector<unsigned, 2> ISDs;
......
......@@ -674,9 +674,10 @@ int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Value *Ptr, bool VariableMask,
unsigned Alignment) const {
unsigned Alignment,
const Instruction *I) const {
int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment);
Alignment, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
......@@ -694,17 +695,21 @@ int TargetTransformInfo::getInterleavedMemoryOpCost(
}
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed) const {
ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed,
const Instruction *I) const {
int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed);
ScalarizationCostPassed, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF,
const Instruction *I) const {
int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
......@@ -1339,8 +1344,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags();
return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
Args, FMF);
return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
FMF, 1, II);
}
return -1;
default:
......
......@@ -478,14 +478,14 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
template <typename T>
int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<T *> Args,
FastMathFlags FMF, unsigned VF) {
ArrayRef<T *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) {
if (ID != Intrinsic::fma)
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
EVT OrigTy = TLI->getValueType(DL, RetTy);
if (!OrigTy.isSimple()) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
}
// Legalize the type.
......@@ -507,16 +507,17 @@ int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
}
int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value*> Args, FastMathFlags FMF,
unsigned VF) {
return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF);
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I) {
return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF, I);
}
int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed) {
unsigned ScalarizationCostPassed,
const Instruction *I) {
return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF,
ScalarizationCostPassed);
ScalarizationCostPassed, I);
}
unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
......@@ -889,7 +890,7 @@ unsigned GCNTTIImpl::getUserCost(const User *U,
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags();
return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
FMF);
FMF, 1, II);
} else {
return BaseT::getUserCost(U, Operands);
}
......
......@@ -219,15 +219,16 @@ public:
Type *Ty,
bool IsPairwise);
template <typename T>
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<T *> Args, FastMathFlags FMF,
unsigned VF);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<T *> Args,
FastMathFlags FMF, unsigned VF,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX);
unsigned ScalarizationCostPassed = UINT_MAX,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1);
unsigned VF = 1, const Instruction *I = nullptr);
int getMinMaxReductionCost(Type *Ty, Type *CondTy,
bool IsPairwiseForm,
bool IsUnsigned);
......
......@@ -863,16 +863,17 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Value *Ptr, bool VariableMask,
unsigned Alignment) {
unsigned Alignment,
const Instruction *I) {
using namespace PatternMatch;
if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters)
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment);
Alignment, I);
assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
VectorType *VTy = cast<VectorType>(DataTy);
// TODO: Splitting, once we do that.
// TODO: trunc/sext/zext the result/input
unsigned NumElems = VTy->getNumElements();
unsigned EltSize = VTy->getScalarSizeInBits();
......@@ -889,19 +890,54 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
unsigned ScalarCost =
NumElems * LT.first + BaseT::getScalarizationOverhead(DataTy, {});
// TODO: Cost extended gathers or trunc stores correctly.
if (EltSize * NumElems != 128 || NumElems < 4)
return ScalarCost;
if (Alignment < EltSize / 8)
return ScalarCost;
unsigned ExtSize = EltSize;
// Check whether there's a single user that asks for an extended type
if (I != nullptr) {
// Dependent of the caller of this function, a gather instruction will
// either have opcode Instruction::Load or be a call to the masked_gather
// intrinsic
if ((I->getOpcode() == Instruction::Load ||
match(I, m_Intrinsic<Intrinsic::masked_gather>())) &&
I->hasOneUse()) {
const User *Us = *I->users().begin();
if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
// only allow valid type combinations
unsigned TypeSize =
cast<Instruction>(Us)->getType()->getScalarSizeInBits();
if (((TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
(TypeSize == 16 && EltSize == 8)) &&
TypeSize * NumElems == 128) {
ExtSize = TypeSize;
}
}
}
// Check whether the input data needs to be truncated
TruncInst *T;
if ((I->getOpcode() == Instruction::Store ||
match(I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
(T = dyn_cast<TruncInst>(I->getOperand(0)))) {
// Only allow valid type combinations
unsigned TypeSize = T->getOperand(0)->getType()->getScalarSizeInBits();
if (((EltSize == 16 && TypeSize == 32) ||
(EltSize == 8 && (TypeSize == 32 || TypeSize == 16))) &&
TypeSize * NumElems == 128)
ExtSize = TypeSize;
}
}
if (ExtSize * NumElems != 128 || NumElems < 4)
return ScalarCost;
// Any (aligned) i32 gather will not need to be scalarised.
if (EltSize == 32)
if (ExtSize == 32)
return VectorCost;
// For smaller types, we need to ensure that the gep's inputs are correctly
// extended from a small enough value. Other size (including i64) are
// extended from a small enough value. Other sizes (including i64) are
// scalarized for now.
if (EltSize != 8 && EltSize != 16)
if (ExtSize != 8 && ExtSize != 16)
return ScalarCost;
if (auto BC = dyn_cast<BitCastInst>(Ptr))
......@@ -911,12 +947,13 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
return ScalarCost;
unsigned Scale = DL.getTypeAllocSize(GEP->getResultElementType());
// Scale needs to be correct (which is only relevant for i16s).
if (Scale != 1 && Scale * 8 != EltSize)
if (Scale != 1 && Scale * 8 != ExtSize)
return ScalarCost;
// And we need to zext (not sext) the indexes from a small enough type.
if (auto ZExt = dyn_cast<ZExtInst>(GEP->getOperand(1)))
if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= EltSize)
if (auto ZExt = dyn_cast<ZExtInst>(GEP->getOperand(1))) {
if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
return VectorCost;
}
return ScalarCost;
}
return ScalarCost;
......
......@@ -222,7 +222,8 @@ public:
bool UseMaskForGaps = false);
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment);
bool VariableMask, unsigned Alignment,
const Instruction *I = nullptr);
bool isLoweredToCall(const Function *F);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
......
......@@ -131,19 +131,23 @@ unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
}
unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF,
const Instruction *I) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
}
unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type*> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed) {
ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed,
const Instruction *I) {
if (ID == Intrinsic::bswap) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
return LT.first + 2;
}
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed);
ScalarizationCostPassed, I);
}
unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
......@@ -209,9 +213,11 @@ unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
}
unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
Value *Ptr, bool VariableMask, unsigned Alignment) {
Value *Ptr, bool VariableMask,
unsigned Alignment,
const Instruction *I) {
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment);
Alignment, I);
}
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
......
......@@ -106,10 +106,12 @@ public:
unsigned VF);
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type*> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX);
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
const Instruction *I = nullptr);
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *S);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
......@@ -120,7 +122,8 @@ public:
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp);
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment);
bool VariableMask, unsigned Alignment,
const Instruction *I);
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, bool UseMaskForCond = false,
......
......@@ -936,17 +936,21 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}
unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF,
const Instruction *I) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
}
unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type*> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed) {
ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed,
const Instruction *I) {
if (ID == Intrinsic::bswap && ST->hasP9Vector())
return TLI->getTypeLegalizationCost(DL, RetTy).first;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed);
ScalarizationCostPassed, I);
}
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
......
......@@ -111,10 +111,12 @@ public:
bool UseMaskForCond = false,
bool UseMaskForGaps = false);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF, const Instruction *I = nullptr);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type*> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX);
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
const Instruction *I = nullptr);
/// @}
};
......
......@@ -1124,20 +1124,22 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF) {
FastMathFlags FMF, unsigned VF,
const Instruction *I) {
int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
if (Cost != -1)
return Cost;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I);
}
int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed) {
unsigned ScalarizationCostPassed,
const Instruction *I) {
int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
if (Cost != -1)
return Cost;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys,
FMF, ScalarizationCostPassed);
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, I);
}
......@@ -101,10 +101,11 @@ public:
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,