Commit e3056ae9 authored by Sam Parker's avatar Sam Parker
Browse files

[NFC][TTI] Explicit use of VectorType

The API for shuffles and reductions uses generic Type parameters,
instead of VectorType, and so assertions and casts are used a lot.
This patch makes those types explicit, which means that the clients
can't be lazy, but results in less ambiguity, and that can only be a
good thing.

Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=45562

Differential Revision: https://reviews.llvm.org/D78357
parent a8e15ee0
......@@ -910,8 +910,8 @@ public:
/// extraction shuffle kinds to show the insert/extract point and the type of
/// the subvector being inserted/extracted.
/// NOTE: For subvector extractions Tp represents the source type.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
Type *SubTp = nullptr) const;
int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,
VectorType *SubTp = nullptr) const;
/// \return The expected cost of cast instructions, such as bitcast, trunc,
/// zext, etc. If there is an existing instruction that holds Opcode, it
......@@ -989,10 +989,10 @@ public:
/// Split:
/// (v0, v1, v2, v3)
/// ((v0+v2), (v1+v3), undef, undef)
int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm) const;
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
bool IsUnsigned) const;
int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm, bool IsUnsigned) const;
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction
......@@ -1332,8 +1332,8 @@ public:
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI = nullptr) = 0;
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) = 0;
virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) = 0;
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
......@@ -1356,9 +1356,9 @@ public:
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, bool UseMaskForCond = false,
bool UseMaskForGaps = false) = 0;
virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm) = 0;
virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm, bool IsUnsigned) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
......@@ -1731,8 +1731,8 @@ public:
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) override {
int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp) override {
return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
}
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
......@@ -1775,12 +1775,12 @@ public:
Alignment, AddressSpace,
UseMaskForCond, UseMaskForGaps);
}
int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm) override {
return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
}
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
bool IsUnsigned) override {
int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm, bool IsUnsigned) override {
return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
......
......@@ -438,8 +438,8 @@ public:
return 1;
}
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
Type *SubTp) {
unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index,
VectorType *SubTp) {
return 1;
}
......@@ -512,9 +512,9 @@ public:
return 0;
}
unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
unsigned getArithmeticReductionCost(unsigned, VectorType *, bool) { return 1; }
unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool) { return 1; }
unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
......
......@@ -80,8 +80,7 @@ private:
/// Estimate a cost of Broadcast as an extract and sequence of insert
/// operations.
unsigned getBroadcastShuffleOverhead(Type *Ty) {
auto *VTy = cast<VectorType>(Ty);
unsigned getBroadcastShuffleOverhead(VectorType *VTy) {
unsigned Cost = 0;
// Broadcast cost is equal to the cost of extracting the zero'th element
// plus the cost of inserting it into every element of the result vector.
......@@ -97,8 +96,7 @@ private:
/// Estimate a cost of shuffle as a sequence of extract and insert
/// operations.
unsigned getPermuteShuffleOverhead(Type *Ty) {
auto *VTy = cast<VectorType>(Ty);
unsigned getPermuteShuffleOverhead(VectorType *VTy) {
unsigned Cost = 0;
// Shuffle cost is equal to the cost of extracting element from its argument
// plus the cost of inserting them onto the result vector.
......@@ -118,11 +116,10 @@ private:
/// Estimate a cost of subvector extraction as a sequence of extract and
/// insert operations.
unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index,
VectorType *SubVTy) {
assert(VTy && SubVTy &&
"Can only extract subvectors from vectors");
auto *VTy = cast<VectorType>(Ty);
auto *SubVTy = cast<VectorType>(SubTy);
int NumSubElts = SubVTy->getNumElements();
assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
"SK_ExtractSubvector index out of range");
......@@ -142,11 +139,10 @@ private:
/// Estimate a cost of subvector insertion as a sequence of extract and
/// insert operations.
unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index,
VectorType *SubVTy) {
assert(VTy && SubVTy &&
"Can only insert subvectors into vectors");
auto *VTy = cast<VectorType>(Ty);
auto *SubVTy = cast<VectorType>(SubTy);
int NumSubElts = SubVTy->getNumElements();
assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
"SK_InsertSubvector index out of range");
......@@ -683,8 +679,8 @@ public:
return OpCost;
}
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp) {
switch (Kind) {
case TTI::SK_Broadcast:
return getBroadcastShuffleOverhead(Tp);
......@@ -1198,6 +1194,7 @@ public:
unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max(),
const Instruction *I = nullptr) {
auto *ConcreteTTI = static_cast<T *>(this);
auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]);
SmallVector<unsigned, 2> ISDs;
unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
......@@ -1320,28 +1317,28 @@ public:
case Intrinsic::masked_load:
return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
case Intrinsic::experimental_vector_reduce_add:
return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_mul:
return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_and:
return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_or:
return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_xor:
return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_v2_fadd:
return ConcreteTTI->getArithmeticReductionCost(
Instruction::FAdd, Tys[0],
Instruction::FAdd, VecOpTy,
/*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
// reductions.
case Intrinsic::experimental_vector_reduce_v2_fmul:
return ConcreteTTI->getArithmeticReductionCost(
Instruction::FMul, Tys[0],
Instruction::FMul, VecOpTy,
/*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
// reductions.
case Intrinsic::experimental_vector_reduce_smax:
......@@ -1349,12 +1346,14 @@ public:
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
return ConcreteTTI->getMinMaxReductionCost(
Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
/*IsPairwiseForm=*/false,
/*IsUnsigned=*/false);
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
return ConcreteTTI->getMinMaxReductionCost(
Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
/*IsPairwiseForm=*/false,
/*IsUnsigned=*/true);
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat: {
......@@ -1639,11 +1638,10 @@ public:
///
/// The cost model should take into account that the actual length of the
/// vector is reduced on each iteration.
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwise) {
assert(Ty->isVectorTy() && "Expect a vector type");
Type *ScalarTy = cast<VectorType>(Ty)->getElementType();
unsigned NumVecElts = cast<VectorType>(Ty)->getNumElements();
Type *ScalarTy = Ty->getElementType();
unsigned NumVecElts = Ty->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
unsigned ArithCost = 0;
unsigned ShuffleCost = 0;
......@@ -1655,7 +1653,7 @@ public:
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
VectorType *SubTy = VectorType::get(ScalarTy, NumVecElts);
// Assume the pairwise shuffles add a cost.
ShuffleCost += (IsPairwise + 1) *
ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
......@@ -1689,12 +1687,11 @@ public:
/// Try to calculate op costs for min/max reduction operations.
/// \param CondTy Conditional type for the Select instruction.
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
bool) {
assert(Ty->isVectorTy() && "Expect a vector type");
Type *ScalarTy = cast<VectorType>(Ty)->getElementType();
Type *ScalarCondTy = cast<VectorType>(CondTy)->getElementType();
unsigned NumVecElts = cast<VectorType>(Ty)->getNumElements();
unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwise, bool) {
Type *ScalarTy = Ty->getElementType();
Type *ScalarCondTy = CondTy->getElementType();
unsigned NumVecElts = Ty->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
unsigned CmpOpcode;
if (Ty->isFPOrFPVectorTy()) {
......@@ -1714,7 +1711,7 @@ public:
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
VectorType *SubTy = VectorType::get(ScalarTy, NumVecElts);
CondTy = VectorType::get(ScalarCondTy, NumVecElts);
// Assume the pairwise shuffles add a cost.
......
......@@ -599,8 +599,8 @@ int TargetTransformInfo::getArithmeticInstrCost(
return Cost;
}
int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
Type *SubTp) const {
int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty,
int Index, VectorType *SubTp) const {
int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
......@@ -732,14 +732,16 @@ int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
return Cost;
}
int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode,
VectorType *Ty,
bool IsPairwiseForm) const {
int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy,
int TargetTransformInfo::getMinMaxReductionCost(VectorType *Ty,
VectorType *CondTy,
bool IsPairwiseForm,
bool IsUnsigned) const {
int Cost =
......@@ -1011,7 +1013,8 @@ static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
}
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
unsigned &Opcode, Type *&Ty) {
unsigned &Opcode,
VectorType *&Ty) {
if (!EnableReduxCost)
return RK_None;
......@@ -1076,7 +1079,7 @@ getShuffleAndOtherOprd(Value *L, Value *R) {
static ReductionKind
matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
unsigned &Opcode, Type *&Ty) {
unsigned &Opcode, VectorType *&Ty) {
if (!EnableReduxCost)
return RK_None;
......@@ -1249,7 +1252,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
// Try to match a reduction sequence (series of shufflevector and vector
// adds followed by a extractelement).
unsigned ReduxOpCode;
Type *ReduxType;
VectorType *ReduxType;
switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
case RK_Arithmetic:
......@@ -1257,11 +1260,11 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
/*IsPairwiseForm=*/false);
case RK_MinMax:
return getMinMaxReductionCost(
ReduxType, CmpInst::makeCmpResultType(ReduxType),
ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
/*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
case RK_UnsignedMinMax:
return getMinMaxReductionCost(
ReduxType, CmpInst::makeCmpResultType(ReduxType),
ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
/*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
case RK_None:
break;
......@@ -1273,11 +1276,11 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
/*IsPairwiseForm=*/true);
case RK_MinMax:
return getMinMaxReductionCost(
ReduxType, CmpInst::makeCmpResultType(ReduxType),
ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
/*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
case RK_UnsignedMinMax:
return getMinMaxReductionCost(
ReduxType, CmpInst::makeCmpResultType(ReduxType),
ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
/*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
case RK_None:
break;
......@@ -1298,8 +1301,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return 0; // Model all ExtractValue nodes as free.
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
Type *Ty = Shuffle->getType();
Type *SrcTy = Shuffle->getOperand(0)->getType();
auto *Ty = cast<VectorType>(Shuffle->getType());
auto *SrcTy = cast<VectorType>(Shuffle->getOperand(0)->getType());
// TODO: Identify and add costs for insert subvector, etc.
int SubIndex;
......
......@@ -930,7 +930,8 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
return false;
}
int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
VectorType *ValTy,
bool IsPairwiseForm) {
if (IsPairwiseForm)
......@@ -958,8 +959,8 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
}
int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
int Index, VectorType *SubTp) {
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
static const CostTblEntry ShuffleTbl[] = {
......
......@@ -224,10 +224,11 @@ public:
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp);
/// @}
};
......
......@@ -620,8 +620,8 @@ unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
}
}
int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwise) {
int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwise) {
EVT OrigTy = TLI->getValueType(DL, Ty);
// Computes cost on targets that have packed math instructions(which support
......@@ -635,7 +635,7 @@ int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
return LT.first * getFullRateInstrCost();
}
int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwise,
bool IsUnsigned) {
EVT OrigTy = TLI->getValueType(DL, Ty);
......@@ -899,10 +899,9 @@ bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
}
}
unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *VT,
int Index, VectorType *SubTp) {
if (ST->hasVOP3PInsts()) {
VectorType *VT = cast<VectorType>(Tp);
if (VT->getNumElements() == 2 &&
DL.getTypeSizeInBits(VT->getElementType()) == 16) {
// With op_sel VOP3P instructions freely can access the low half or high
......@@ -919,7 +918,7 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
return BaseT::getShuffleCost(Kind, VT, Index, SubTp);
}
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
......@@ -986,8 +985,8 @@ unsigned GCNTTIImpl::getUserCost(const User *U,
}
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
Type *Ty = Shuffle->getType();
Type *SrcTy = Shuffle->getOperand(0)->getType();
auto *Ty = cast<VectorType>(Shuffle->getType());
auto *SrcTy = cast<VectorType>(Shuffle->getOperand(0)->getType());
// TODO: Identify and add costs for insert subvector, etc.
int SubIndex;
......
......@@ -215,8 +215,8 @@ public:
unsigned getVectorSplitCost() { return 0; }
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp);
unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
......@@ -226,7 +226,7 @@ public:
int getInlinerVectorBonusPercent() { return 0; }
int getArithmeticReductionCost(unsigned Opcode,
Type *Ty,
VectorType *Ty,
bool IsPairwise);
template <typename T>
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<T *> Args,
......@@ -239,7 +239,7 @@ public:
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1, const Instruction *I = nullptr);
int getMinMaxReductionCost(Type *Ty, Type *CondTy,
int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm,
bool IsUnsigned);
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
......
......@@ -587,8 +587,8 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
return LibCallCost;
}
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
int Index, VectorType *SubTp) {
if (ST->hasNEON()) {
if (Kind == TTI::SK_Broadcast) {
static const CostTblEntry NEONDupTbl[] = {
......
......@@ -164,7 +164,8 @@ public:
int getMemcpyCost(const Instruction *I);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp);
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
......
......@@ -526,9 +526,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
assert (Tp->isVectorTy());
int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
int Index, VectorType *SubTp) {
if (ST->hasVector()) {
unsigned NumVectors = getNumVectorRegs(Tp);
......
......@@ -81,7 +81,8 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
VectorType *SubTp);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
......
......@@ -925,12 +925,11 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
}
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
Type *SubTp) {
auto *Tp = cast<VectorType>(BaseTp);
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
int Index, VectorType *SubTp) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
// Treat Transpose as 2-op shuffles - there's no difference in lowering.
if (Kind == TTI::SK_Transpose)
......@@ -965,13 +964,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
LT.second.getVectorElementType() ==
SubLT.second.getVectorElementType() &&
LT.second.getVectorElementType().getSizeInBits() ==
Tp->getElementType()->getPrimitiveSizeInBits()) {
BaseTp->getElementType()->getPrimitiveSizeInBits()) {
assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
"Unexpected number of elements!");
Type *VecTy = VectorType::get(Tp->getElementType(),
LT.second.getVectorNumElements());
Type *SubTy = VectorType::get(Tp->getElementType(),
SubLT.second.getVectorNumElements());
VectorType *VecTy = VectorType::get(BaseTp->getElementType(),
LT.second.getVectorNumElements());
VectorType *SubTy =
VectorType::get(BaseTp->getElementType(),
SubLT.second.getVectorNumElements());
int ExtractIndex = alignDown((Index % NumElts), NumSubElts);
int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
ExtractIndex, SubTy);
......@@ -991,7 +991,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
// Handle some common (illegal) sub-vector types as they are often very cheap
// to shuffle even on targets without PSHUFB.
EVT VT = TLI->getValueType(DL, Tp);
EVT VT = TLI->getValueType(DL, BaseTp);
if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 &&
!ST->hasSSSE3()) {
static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
......@@ -1032,25 +1032,26 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
MVT LegalVT = LT.second;
if (LegalVT.isVector() &&
LegalVT.getVectorElementType().getSizeInBits() ==
Tp->getElementType()->getPrimitiveSizeInBits() &&
LegalVT.getVectorNumElements() < Tp->getNumElements()) {
BaseTp->getElementType()->getPrimitiveSizeInBits() &&