[ARM,MVE] Intrinsics for partial-overwrite imm shifts. (31004809) · Commits · llvm-doe / llvm-project

clang/include/clang/Basic/arm_mve.td

+37 −0

Original line number	Diff line number	Diff line
		@@ -651,6 +651,43 @@ multiclass vshll_imm<int top> {
		defm vshllbq : vshll_imm<0>;
		defm vshlltq : vshll_imm<1>;

		multiclass DyadicImmShift<Type outtype, Immediate imm, string intname = NAME,
		dag extraargs = (?)> {
		foreach intparams = [!if(!eq(!cast<string>(outtype), !cast<string>(Vector)),
		[Vector], [outtype, Vector])] in {
		def q_n: Intrinsic<
		outtype, (args outtype:$a, Vector:$b, imm:$sh),
		!con((IRInt<intname, intparams> $a, $b, $sh), extraargs)>;

		def q_m_n: Intrinsic<
		outtype, (args outtype:$a, Vector:$b, imm:$sh, Predicate:$pred),
		!con((IRInt<intname # "_predicated", intparams # [Predicate]>
		$a, $b, $sh), extraargs, (? $pred))>;
		}
		}

		multiclass VSHRN<Type outtype, Immediate imm, dag extraargs> {
		defm b: DyadicImmShift<outtype, imm, "vshrn", !con(extraargs, (? 0))>;
		defm t: DyadicImmShift<outtype, imm, "vshrn", !con(extraargs, (? 1))>;
		}

		let params = [s16, s32, u16, u32], pnt = PNT_NType in {
		foreach U = [(unsignedflag Scalar)] in {
		defm vshrn : VSHRN<HalfVector, imm_1toHalfN, (? 0,0,U,U)>;
		defm vqshrn : VSHRN<HalfVector, imm_1toHalfN, (? 1,0,U,U)>;
		defm vrshrn : VSHRN<HalfVector, imm_1toHalfN, (? 0,1,U,U)>;
		defm vqrshrn : VSHRN<HalfVector, imm_1toHalfN, (? 1,1,U,U)>;
		}
		}
		let params = [s16, s32], pnt = PNT_NType in {
		defm vqshrun : VSHRN<UHalfVector, imm_1toHalfN, (? 1,0,1,0)>;
		defm vqrshrun : VSHRN<UHalfVector, imm_1toHalfN, (? 1,0,1,0)>;
		}
		let params = T.Int, pnt = PNT_NType in {
		defm vsli : DyadicImmShift<Vector, imm_1toN>;
		defm vsri : DyadicImmShift<Vector, imm_1toN>;
		}

		// Base class for the scalar shift intrinsics.
		class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
		Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {

clang/include/clang/Basic/arm_mve_defs.td

+19 −5

Original line number	Diff line number	Diff line
		@@ -190,7 +190,10 @@ def CTO_Pred: ComplexTypeOp;
		class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }
		class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }
		def CTO_CopyKind: ComplexTypeOp;
		def CTO_DoubleSize: ComplexTypeOp;
		class CTO_ScaleSize<int num_, int denom_>: ComplexTypeOp {
		int num = num_;
		int denom = denom_;
		}

		// -----------------------------------------------------------------------------
		// Instances of Type intended to be used directly in the specification of an
		@@ -268,7 +271,8 @@ class CopyKind<Type s, Type k>: ComplexType<(CTO_CopyKind s, k)>;
		// DoubleSize<k> expects k to be a scalar type. It returns a scalar type
		// whose kind (signed, unsigned or float) matches that of k, and whose size
		// is double that of k, if possible.
		class DoubleSize<Type k>: ComplexType<(CTO_DoubleSize k)>;
		class DoubleSize<Type k> : ComplexType<(CTO_ScaleSize<2, 1> k)>;
		class HalfSize<Type k> : ComplexType<(CTO_ScaleSize<1, 2> k)>;

		// Unsigned<t> expects t to be a scalar type, and expands to the unsigned
		// integer scalar of the same size. So it returns u16 if you give it s16 or
		@@ -280,9 +284,12 @@ class Unsigned<Type t>: ComplexType<(CTO_CopyKind t, u32)>;
		def UScalar: Unsigned<Scalar>;
		def UVector: VecOf<UScalar>;

		// DblVector expands to a vector of scalars of size twice the size of
		// Scalar.
		// DblVector expands to a vector of scalars of size twice the size of Scalar.
		// HalfVector, similarly, expands to a vector of half-sized scalars. And
		// UHalfVector is a vector of half-sized _unsigned integers_.
		def DblVector: VecOf<DoubleSize<Scalar>>;
		def HalfVector: VecOf<HalfSize<Scalar>>;
		def UHalfVector: VecOf<Unsigned<HalfSize<Scalar>>>;

		// Expands to the 32-bit integer of the same signedness as Scalar.
		def Scalar32: CopyKind<u32, Scalar>;
		@@ -305,7 +312,10 @@ class IB_ConstRange<int lo_, int hi_> : ImmediateBounds {
		}
		def IB_UEltValue : ImmediateBounds;
		def IB_LaneIndex : ImmediateBounds;
		class IB_EltBit<int base_> : ImmediateBounds { int base = base_; }
		class IB_EltBit<int base_, Type type_ = Scalar> : ImmediateBounds {
		int base = base_;
		Type type = type_;
		}

		// -----------------------------------------------------------------------------
		// End-user definitions for immediate arguments.
		@@ -327,8 +337,12 @@ def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
		//
		// imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
		// inclusive.
		//
		// imm_1toHalfN is like imm_1toN, but applied to a half-width type.
		// (So if Scalar is s16, for example, it'll give you the range 1 to 8.)
		def imm_1toN : Immediate<sint, IB_EltBit<1>>;
		def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>;
		def imm_1toHalfN : Immediate<sint, IB_EltBit<1, HalfSize<Scalar>>>;

		// imm_lane has to be the index of a vector lane in the main vector type, i.e
		// it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)

clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c

0 → 100644

+1565 −0

File added.

Preview size limit exceeded, changes collapsed.

clang/utils/TableGen/MveEmitter.cpp

+7 −4

Original line number	Diff line number	Diff line
		@@ -1099,14 +1099,16 @@ const Type MveEmitter::getType(DagInit D, const Type *Param) {
		PrintFatalError("Cannot find a type to satisfy CopyKind");
		}

		if (Op->getName() == "CTO_DoubleSize") {
		if (Op->isSubClassOf("CTO_ScaleSize")) {
		const ScalarType *STKind = cast<ScalarType>(getType(D->getArg(0), Param));
		int Num = Op->getValueAsInt("num"), Denom = Op->getValueAsInt("denom");
		unsigned DesiredSize = STKind->sizeInBits() * Num / Denom;
		for (const auto &kv : ScalarTypes) {
		const ScalarType *RT = kv.second.get();
		if (RT->kind() == STKind->kind() && RT->sizeInBits() == 2*STKind->sizeInBits())
		if (RT->kind() == STKind->kind() && RT->sizeInBits() == DesiredSize)
		return RT;
		}
		PrintFatalError("Cannot find a type to satisfy DoubleSize");
		PrintFatalError("Cannot find a type to satisfy ScaleSize");
		}

		PrintFatalError("Bad operator in type dag expression");
		@@ -1338,7 +1340,8 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record R, const Type Param)
		} else if (Bounds->isSubClassOf("IB_EltBit")) {
		IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
		IA.i1 = Bounds->getValueAsInt("base");
		IA.i2 = IA.i1 + Param->sizeInBits() - 1;
		const Type *T = ME.getType(Bounds->getValueAsDef("type"), Param);
		IA.i2 = IA.i1 + T->sizeInBits() - 1;
		} else {
		PrintFatalError("unrecognised ImmediateBounds subclass");
		}

llvm/include/llvm/IR/IntrinsicsARM.td

+11 −0

Original line number	Diff line number	Diff line
		@@ -944,6 +944,17 @@ defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
		[llvm_anyvector_ty, llvm_i32_ty /shiftcount/, llvm_i32_ty /unsigned/,
		llvm_i32_ty /top-half/]>;

		defm int_arm_mve_vsli: MVEPredicated<
		[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
		defm int_arm_mve_vsri: MVEPredicated<
		[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;

		defm int_arm_mve_vshrn: MVEPredicated<
		[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
		llvm_i32_ty /shiftcount/, llvm_i32_ty /saturate/, llvm_i32_ty /round/,
		llvm_i32_ty /unsigned-out/, llvm_i32_ty /unsigned-in/,
		llvm_i32_ty /top-half/]>;

		// MVE scalar shifts.
		class ARM_MVE_qrshift_single<list<LLVMType> value,
		list<LLVMType> saturate = []> :