[ARM,MVE] Add intrinsics for gather/scatter load/stores. (6c3fee47) · Commits · llvm-doe / llvm-project

clang/include/clang/Basic/arm_mve.td

+149 −13

Original line number	Diff line number	Diff line
		@@ -72,22 +72,158 @@ def vcvt#half#q_m_f16: Intrinsic<

		} // loop over half = "b", "t"

		let params = T.All32, pnt = PNT_None in
		def vldrwq_gather_base_wb: Intrinsic<
		Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<4>:$offset),
		(seq (IRInt<"vldr_gather_base_wb", [Vector, VecOf<Unsigned<Scalar>>]>
		multiclass gather_base<list<Type> types, int size> {
		let params = types, pnt = PNT_None in {
		def _gather_base: Intrinsic<
		Vector, (args UVector:$addr, imm_mem7bit<size>:$offset),
		(IRInt<"vldr_gather_base", [Vector, UVector]> $addr, $offset)>;

		def _gather_base_z: Intrinsic<
		Vector, (args UVector:$addr, imm_mem7bit<size>:$offset, Predicate:$pred),
		(IRInt<"vldr_gather_base_predicated", [Vector, UVector, Predicate]>
		$addr, $offset, $pred)>;

		def _gather_base_wb: Intrinsic<
		Vector, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset),
		(seq (IRInt<"vldr_gather_base_wb", [Vector, UVector]>
		(load $addr), $offset):$pair,
		(store (xval $pair, 1), $addr),
		(xval $pair, 0))>;

		let params = T.All64, pnt = PNT_None in
		def vldrdq_gather_base_wb_z: Intrinsic<
		Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<8>:$offset,
		def _gather_base_wb_z: Intrinsic<
		Vector, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset,
		Predicate:$pred),
		(seq (IRInt<"vldr_gather_base_wb_predicated", [Vector, VecOf<Unsigned<Scalar>>, Predicate]>
		(seq (IRInt<"vldr_gather_base_wb_predicated",
		[Vector, UVector, Predicate]>
		(load $addr), $offset, $pred):$pair,
		(store (xval $pair, 1), $addr),
		(xval $pair, 0))>;
		}
		}

		defm vldrwq: gather_base<T.All32, 4>;
		defm vldrdq: gather_base<T.All64, 8>;

		multiclass scatter_base<list<Type> types, int size> {
		let params = types in {
		def _scatter_base: Intrinsic<
		Void, (args UVector:$addr, imm_mem7bit<size>:$offset, Vector:$data),
		(IRInt<"vstr_scatter_base", [UVector, Vector]> $addr, $offset, $data)>;

		def _scatter_base_p: Intrinsic<
		Void, (args UVector:$addr, imm_mem7bit<size>:$offset, Vector:$data,
		Predicate:$pred),
		(IRInt<"vstr_scatter_base_predicated", [UVector, Vector, Predicate]>
		$addr, $offset, $data, $pred)>;

		def _scatter_base_wb: Intrinsic<
		Void, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset, Vector:$data),
		(seq (IRInt<"vstr_scatter_base_wb", [UVector, Vector]>
		(load $addr), $offset, $data):$wbaddr,
		(store $wbaddr, $addr))>;

		def _scatter_base_wb_p: Intrinsic<
		Void, (args Ptr<UVector>:$addr, imm_mem7bit<size>:$offset,
		Vector:$data, Predicate:$pred),
		(seq (IRInt<"vstr_scatter_base_wb_predicated",
		[UVector, Vector, Predicate]>
		(load $addr), $offset, $data, $pred):$wbaddr,
		(store $wbaddr, $addr))>;
		}
		}

		defm vstrwq: scatter_base<T.All32, 4>;
		defm vstrdq: scatter_base<T.All64, 8>;

		multiclass gather_offset_unshifted<list<Type> types, PrimitiveType memtype> {
		let params = types in {
		def _gather_offset: Intrinsic<
		Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets),
		(IRInt<"vldr_gather_offset",
		[Vector, CPtr<CopyKind<memtype, Scalar>>, UVector]>
		$base, $offsets, memtype.size, 0, (unsignedflag Scalar))>;
		def _gather_offset_z: Intrinsic<
		Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
		Predicate:$pred),
		(IRInt<"vldr_gather_offset_predicated",
		[Vector, CPtr<CopyKind<memtype, Scalar>>, UVector, Predicate]>
		$base, $offsets, memtype.size, 0, (unsignedflag Scalar), $pred)>;
		}
		}

		multiclass gather_offset_shifted<list<Type> types, PrimitiveType memtype,
		int shift> {
		let params = types in {
		def _gather_shifted_offset: Intrinsic<
		Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets),
		(IRInt<"vldr_gather_offset",
		[Vector, CPtr<CopyKind<memtype, Scalar>>, UVector]>
		$base, $offsets, memtype.size, shift, (unsignedflag Scalar))>;
		def _gather_shifted_offset_z: Intrinsic<
		Vector, (args CPtr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
		Predicate:$pred),
		(IRInt<"vldr_gather_offset_predicated",
		[Vector, CPtr<CopyKind<memtype, Scalar>>, UVector, Predicate]>
		$base, $offsets, memtype.size, shift, (unsignedflag Scalar), $pred)>;
		}
		}

		multiclass gather_offset_both<list<Type> types, PrimitiveType memtype,
		int shift> {
		defm "": gather_offset_unshifted<types, memtype>;
		defm "": gather_offset_shifted<types, memtype, shift>;
		}

		defm vldrbq: gather_offset_unshifted<!listconcat(T.All8, T.Int16, T.Int32), u8>;
		defm vldrhq: gather_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
		defm vldrwq: gather_offset_both<T.All32, u32, 2>;
		defm vldrdq: gather_offset_both<T.Int64, u64, 3>;

		multiclass scatter_offset_unshifted<list<Type> types, PrimitiveType memtype> {
		let params = types in {
		def _scatter_offset: Intrinsic<
		Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
		Vector:$data),
		(IRInt<"vstr_scatter_offset",
		[Ptr<CopyKind<memtype, Scalar>>, UVector, Vector]>
		$base, $offsets, $data, memtype.size, 0)>;
		def _scatter_offset_p: Intrinsic<
		Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
		Vector:$data, Predicate:$pred),
		(IRInt<"vstr_scatter_offset_predicated",
		[Ptr<CopyKind<memtype, Scalar>>, UVector, Vector, Predicate]>
		$base, $offsets, $data, memtype.size, 0, $pred)>;
		}
		}

		multiclass scatter_offset_shifted<list<Type> types, PrimitiveType memtype,
		int shift> {
		let params = types in {
		def _scatter_shifted_offset: Intrinsic<
		Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
		Vector:$data),
		(IRInt<"vstr_scatter_offset",
		[Ptr<CopyKind<memtype, Scalar>>, UVector, Vector]>
		$base, $offsets, $data, memtype.size, shift)>;
		def _scatter_shifted_offset_p: Intrinsic<
		Void, (args Ptr<CopyKind<memtype, Scalar>>:$base, UVector:$offsets,
		Vector:$data, Predicate:$pred),
		(IRInt<"vstr_scatter_offset_predicated",
		[Ptr<CopyKind<memtype, Scalar>>, UVector, Vector, Predicate]>
		$base, $offsets, $data, memtype.size, shift, $pred)>;
		}
		}

		multiclass scatter_offset_both<list<Type> types, PrimitiveType memtype,
		int shift> {
		defm "": scatter_offset_unshifted<types, memtype>;
		defm "": scatter_offset_shifted<types, memtype, shift>;
		}

		defm vstrbq: scatter_offset_unshifted<!listconcat(T.All8,T.Int16,T.Int32), u8>;
		defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
		defm vstrwq: scatter_offset_both<T.All32, u32, 2>;
		defm vstrdq: scatter_offset_both<T.Int64, u64, 3>;

		let params = [Void], pnt = PNT_None in
		def urshrl: Intrinsic<u64, (args u64:$value, imm_1to32:$shift),

clang/include/clang/Basic/arm_mve_defs.td

+20 −5

Original line number	Diff line number	Diff line
		@@ -82,6 +82,11 @@ class IRInt<string name_, list<Type> params_ = [], bit appendKind_ = 0> {
		// the return value of the seq construction as a whole.
		def seq;

		// Another magic operation is 'unsignedflag', which you give a scalar
		// _type_ as an argument, and it expands into 1 for an unsigned type
		// and 0 for a signed (or floating) one.
		def unsignedflag;

		// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
		// indicates that the IR generation for that intrinsic is done by handwritten
		// C++ and not autogenerated at all. The effect in the MVE builtin codegen
		@@ -109,7 +114,7 @@ def CTO_Vec: ComplexTypeOp;
		def CTO_Pred: ComplexTypeOp;
		class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }
		class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }
		class CTO_Sign<bit signed_>: ComplexTypeOp { bit signed = signed_; }
		def CTO_CopyKind: ComplexTypeOp;

		// -----------------------------------------------------------------------------
		// Instances of Type intended to be used directly in the specification of an
		@@ -167,10 +172,20 @@ class MultiVector<int n>: ComplexType<(CTO_Tuple<n> Vector)>;
		class Ptr<Type t>: ComplexType<(CTO_Pointer<0> t)>;
		class CPtr<Type t>: ComplexType<(CTO_Pointer<1> t)>;

		// Unsigned<t> expects t to be a scalar, and expands to the unsigned integer
		// scalar of the same size. So it returns u16 if you give it s16 or f16 (or
		// u16 itself).
		class Unsigned<Type t>: ComplexType<(CTO_Sign<0> t)>;
		// CopyKind<s,k> expects s and k to be scalar types. It returns a scalar type
		// whose kind (signed, unsigned or float) matches that of k, and whose size
		// matches that of s.
		class CopyKind<Type s, Type k>: ComplexType<(CTO_CopyKind s, k)>;

		// Unsigned<t> expects t to be a scalar type, and expands to the unsigned
		// integer scalar of the same size. So it returns u16 if you give it s16 or
		// f16 (or u16 itself).
		class Unsigned<Type t>: ComplexType<(CTO_CopyKind t, u32)>;

		// UScalar and UVector expand to the unsigned-integer versions of
		// Scalar and Vector.
		def UScalar: Unsigned<Scalar>;
		def UVector: VecOf<UScalar>;

		// -----------------------------------------------------------------------------
		// Internal definitions for specifying immediate arguments for an intrinsic.

clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c

0 → 100644

+2146 −0

File added.

Preview size limit exceeded, changes collapsed.

clang/test/Sema/arm-mve-immediates.c

0 → 100644

+56 −0

Original line number	Diff line number	Diff line
		// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -verify -fsyntax-only %s

		#include <arm_mve.h>

		void test_load_offsets(uint32x4_t addr32, uint64x2_t addr64)
		{
		// Offsets that should be a multiple of 8 times 0,1,...,127
		vldrdq_gather_base_s64(addr64, 0);
		vldrdq_gather_base_s64(addr64, 8);
		vldrdq_gather_base_s64(addr64, 2*8);
		vldrdq_gather_base_s64(addr64, 125*8);
		vldrdq_gather_base_s64(addr64, 126*8);
		vldrdq_gather_base_s64(addr64, 127*8);
		vldrdq_gather_base_s64(addr64, -8); // expected-error {{argument value -8 is outside the valid range [0, 1016]}}
		vldrdq_gather_base_s64(addr64, 128*8); // expected-error {{argument value 1024 is outside the valid range [0, 1016]}}
		vldrdq_gather_base_s64(addr64, 4); // expected-error {{argument should be a multiple of 8}}
		vldrdq_gather_base_s64(addr64, 1); // expected-error {{argument should be a multiple of 8}}

		// Offsets that should be a multiple of 4 times 0,1,...,127
		vldrwq_gather_base_s32(addr32, 0);
		vldrwq_gather_base_s32(addr32, 4);
		vldrwq_gather_base_s32(addr32, 2*4);
		vldrwq_gather_base_s32(addr32, 125*4);
		vldrwq_gather_base_s32(addr32, 126*4);
		vldrwq_gather_base_s32(addr32, 127*4);
		vldrwq_gather_base_s32(addr32, -4); // expected-error {{argument value -4 is outside the valid range [0, 508]}}
		vldrwq_gather_base_s32(addr32, 128*4); // expected-error {{argument value 512 is outside the valid range [0, 508]}}
		vldrwq_gather_base_s32(addr32, 2); // expected-error {{argument should be a multiple of 4}}
		vldrwq_gather_base_s32(addr32, 1); // expected-error {{argument should be a multiple of 4}}

		// Show that the polymorphic store intrinsics get the right set of
		// error checks after overload resolution. These ones expand to the
		// 8-byte granular versions...
		vstrdq_scatter_base(addr64, 0, addr64);
		vstrdq_scatter_base(addr64, 8, addr64);
		vstrdq_scatter_base(addr64, 2*8, addr64);
		vstrdq_scatter_base(addr64, 125*8, addr64);
		vstrdq_scatter_base(addr64, 126*8, addr64);
		vstrdq_scatter_base(addr64, 127*8, addr64);
		vstrdq_scatter_base(addr64, -8, addr64); // expected-error {{argument value -8 is outside the valid range [0, 1016]}}
		vstrdq_scatter_base(addr64, 128*8, addr64); // expected-error {{argument value 1024 is outside the valid range [0, 1016]}}
		vstrdq_scatter_base(addr64, 4, addr64); // expected-error {{argument should be a multiple of 8}}
		vstrdq_scatter_base(addr64, 1, addr64); // expected-error {{argument should be a multiple of 8}}

		/// ... and these ones to the 4-byte.
		vstrwq_scatter_base(addr32, 0, addr32);
		vstrwq_scatter_base(addr32, 4, addr32);
		vstrwq_scatter_base(addr32, 2*4, addr32);
		vstrwq_scatter_base(addr32, 125*4, addr32);
		vstrwq_scatter_base(addr32, 126*4, addr32);
		vstrwq_scatter_base(addr32, 127*4, addr32);
		vstrwq_scatter_base(addr32, -4, addr32); // expected-error {{argument value -4 is outside the valid range [0, 508]}}
		vstrwq_scatter_base(addr32, 128*4, addr32); // expected-error {{argument value 512 is outside the valid range [0, 508]}}
		vstrwq_scatter_base(addr32, 2, addr32); // expected-error {{argument should be a multiple of 4}}
		vstrwq_scatter_base(addr32, 1, addr32); // expected-error {{argument should be a multiple of 4}}
		}

clang/utils/TableGen/MveEmitter.cpp

+32 −8

Original line number	Diff line number	Diff line
		@@ -204,6 +204,9 @@ public:
		Name = "const " + Name;
		return Name + " *";
		}
		std::string llvmName() const override {
		return "llvm::PointerType::getUnqual(" + Pointee->llvmName() + ")";
		}

		static bool classof(const Type *T) {
		return T->typeKind() == TypeKind::Pointer;
		@@ -512,6 +515,11 @@ public:
		void setVarname(const StringRef s) { VarName = s; }
		bool varnameUsed() const { return VarNameUsed; }

		// Emit code to generate this result as a Value *.
		virtual std::string asValue() {
		return varname();
		}

		// Code generation happens in multiple passes. This method tracks whether a
		// Result has yet been visited in a given pass, without the need for a
		// tedious loop in between passes that goes through and resets a 'visited'
		@@ -547,6 +555,12 @@ public:
		std::string typeName() const override {
		return AddressType ? "Address" : Result::typeName();
		}
		// Emit code to generate this result as a Value *.
		std::string asValue() override {
		if (AddressType)
		return "(" + varname() + ".getPointer())";
		return Result::asValue();
		}
		};

		// Result subclass for an integer literal appearing in Tablegen. This may need
		@@ -665,7 +679,7 @@ public:
		OS << "), llvm::SmallVector<Value *, " << Args.size() << "> {";
		const char *Sep = "";
		for (auto Arg : Args) {
		OS << Sep << Arg->varname();
		OS << Sep << Arg->asValue();
		Sep = ", ";
		}
		OS << "})";
		@@ -974,17 +988,15 @@ const Type MveEmitter::getType(DagInit D, const Type *Param) {
		return getPointerType(Pointee, Op->getValueAsBit("const"));
		}

		if (Op->isSubClassOf("CTO_Sign")) {
		const ScalarType *ST = cast<ScalarType>(getType(D->getArg(0), Param));
		ScalarTypeKind NewKind = Op->getValueAsBit("signed")
		? ScalarTypeKind::SignedInt
		: ScalarTypeKind::UnsignedInt;
		if (Op->getName() == "CTO_CopyKind") {
		const ScalarType *STSize = cast<ScalarType>(getType(D->getArg(0), Param));
		const ScalarType *STKind = cast<ScalarType>(getType(D->getArg(1), Param));
		for (const auto &kv : ScalarTypes) {
		const ScalarType *RT = kv.second.get();
		if (RT->kind() == NewKind && RT->sizeInBits() == ST->sizeInBits())
		if (RT->kind() == STKind->kind() && RT->sizeInBits() == STSize->sizeInBits())
		return RT;
		}
		PrintFatalError("Cannot change sign of this type");
		PrintFatalError("Cannot find a type to satisfy CopyKind");
		}

		PrintFatalError("Bad operator in type dag expression");
		@@ -1025,6 +1037,18 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
		}
		}
		PrintFatalError("Unsupported type cast");
		} else if (Op->getName() == "unsignedflag") {
		if (D->getNumArgs() != 1)
		PrintFatalError("unsignedflag should have exactly one argument");
		Record *TypeRec = cast<DefInit>(D->getArg(0))->getDef();
		if (!TypeRec->isSubClassOf("Type"))
		PrintFatalError("unsignedflag's argument should be a type");
		if (const auto *ST = dyn_cast<ScalarType>(getType(TypeRec, Param))) {
		return std::make_shared<IntLiteralResult>(
		getScalarType("u32"), ST->kind() == ScalarTypeKind::UnsignedInt);
		} else {
		PrintFatalError("unsignedflag's argument should be a scalar type");
		}
		} else {
		std::vector<Result::Ptr> Args;
		for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i)

Admin message