[RISCV][CostModel] VPIntrinsics have same cost as their non-vp counterparts (#67178) (fc865c20) · Commits · llvm-doe / llvm-project

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+56 −0

Original line number	Diff line number	Diff line
		@@ -1686,6 +1686,62 @@ public:
		}
		}

		// VP Intrinsics should have the same cost as their non-vp counterpart.
		// TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
		// counterpart when the vector length argument is smaller than the maximum
		// vector length.
		if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
		std::optional<unsigned> FOp =
		VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
		if (FOp) {
		// TODO: Support other kinds of Intrinsics (i.e. reductions)
		if (ICA.getID() == Intrinsic::vp_load) {
		Align Alignment;
		if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
		Alignment = VPI->getPointerAlignment().valueOrOne();
		unsigned AS = 0;
		if (ICA.getArgs().size() > 1)
		if (auto *PtrTy =
		dyn_cast<PointerType>(ICA.getArgs()[0]->getType()))
		AS = PtrTy->getAddressSpace();
		return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
		AS, CostKind);
		}
		if (ICA.getID() == Intrinsic::vp_store) {
		Align Alignment;
		if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
		Alignment = VPI->getPointerAlignment().valueOrOne();
		unsigned AS = 0;
		if (ICA.getArgs().size() >= 2)
		if (auto *PtrTy =
		dyn_cast<PointerType>(ICA.getArgs()[1]->getType()))
		AS = PtrTy->getAddressSpace();
		return thisT()->getMemoryOpCost(*FOp, Args[0]->getType(), Alignment,
		AS, CostKind);
		}
		if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
		return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
		CostKind);
		}
		}

		std::optional<Intrinsic::ID> FID =
		VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
		if (FID) {
		// Non-vp version will have same Args/Tys except mask and vector length.
		assert(ICA.getArgs().size() >= 2 && ICA.getArgTypes().size() >= 2 &&
		"Expected VPIntrinsic to have Mask and Vector Length args and "
		"types");
		ArrayRef<const Value *> NewArgs = ArrayRef(ICA.getArgs()).drop_back(2);
		ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);

		IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewArgs,
		NewTys, ICA.getFlags(), ICA.getInst(),
		ICA.getScalarizationCost());
		return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
		}
		}

		// Assume that we need to scalarize this intrinsic.
		// Compute the scalarization overhead based on Args for a vector
		// intrinsic.

llvm/test/Analysis/CostModel/RISCV/gep.ll

+4 −4

Original line number	Diff line number	Diff line
		@@ -270,7 +270,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = getelementptr i8, ptr %base, i32 42
		; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42
		; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42
		; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = getelementptr i8, ptr %base, i32 42
		@@ -282,7 +282,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = getelementptr i8, ptr %base, i32 42
		; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42
		; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42
		; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
		@@ -340,7 +340,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %4 = getelementptr i8, ptr %base, i32 0
		; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 0
		; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 0
		; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %7 = getelementptr i8, ptr %base, i32 0
		@@ -352,7 +352,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %10 = getelementptr i8, ptr %base, i32 0
		; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 0
		; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 0
		; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
		; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void

llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll

+369 −1

File changed.

Preview size limit exceeded, changes collapsed.

Admin message