[Clang][SVE2.1] Add svpext builtins (7cad5a9e) · Commits · llvm-doe / llvm-project

clang/include/clang/Basic/arm_sve.td

+3 −1

Original line number	Diff line number	Diff line
		@@ -1862,11 +1862,13 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sv
		let TargetGuard = "sve2p1" in {
		def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>;
		def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;

		def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
		def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
		}

		let TargetGuard = "sve2p1" in {
		def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>;
		def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
		def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;

		}

clang/include/clang/Basic/arm_sve_sme_incl.td

+2 −1

Original line number	Diff line number	Diff line
		@@ -61,7 +61,8 @@
		// -------------------
		// prototype: return (arg, arg, ...)
		//
		// 2,3,4: array of default vectors
		// 2,3,4: array of vectors
		// .: indicator for multi-vector modifier that will follow (e.g. 2.x)
		// v: void
		// x: vector of signed integers
		// u: vector of unsigned integers

clang/lib/CodeGen/CGBuiltin.cpp

+36 −1

Original line number	Diff line number	Diff line
		@@ -9853,6 +9853,41 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
		return Call;
		}

		Value CodeGenFunction::FormSVEBuiltinResult(Value Call) {
		// Multi-vector results should be broken up into a single (wide) result
		// vector.
		auto *StructTy = dyn_cast<StructType>(Call->getType());
		if (!StructTy)
		return Call;

		auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
		if (!VTy)
		return Call;
		unsigned N = StructTy->getNumElements();

		// We may need to emit a cast to a svbool_t
		bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
		unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();

		ScalableVectorType *WideVTy =
		ScalableVectorType::get(VTy->getElementType(), MinElts * N);
		Value *Ret = llvm::PoisonValue::get(WideVTy);
		for (unsigned I = 0; I < N; ++I) {
		Value *SRet = Builder.CreateExtractValue(Call, I);
		assert(SRet->getType() == VTy && "Unexpected type for result value");
		Value Idx = ConstantInt::get(CGM.Int64Ty, I MinElts);

		if (IsPredTy)
		SRet = EmitSVEPredicateCast(
		SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));

		Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
		}
		Call = Ret;

		return Call;
		}

		Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
		const CallExpr *E) {
		// Find out if any arguments are required to be integer constant expressions.
		@@ -9966,7 +10001,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
		if (PredTy->getScalarType()->isIntegerTy(1))
		Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));

		return Call;
		return FormSVEBuiltinResult(Call);
		}

		switch (BuiltinID) {

clang/lib/CodeGen/CodeGenFunction.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -4292,6 +4292,11 @@ public:
		llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags,
		SmallVectorImpl<llvm::Value *> &Ops,
		unsigned IntID);
		/// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider
		/// vector. It extracts the scalable vector from the struct and inserts into
		/// the wider vector. This avoids the error when allocating space in llvm
		/// for struct of scalable vectors if a function returns struct.
		llvm::Value FormSVEBuiltinResult(llvm::Value Call);
		llvm::Value EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr E);

		llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags,

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c

0 → 100644

+152 −0

Original line number	Diff line number	Diff line
		// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
		// REQUIRES: aarch64-registered-target
		// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s \| FileCheck %s
		// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s \| FileCheck %s -check-prefix=CPP-CHECK

		#include <arm_sve.h>

		// CHECK-LABEL: @test_svpext_lane_c8_0(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
		//
		// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_0u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
		//
		svbool_t test_svpext_lane_c8_0(svcount_t c) {
		return svpext_lane_c8(c, 0);
		}

		// CHECK-LABEL: @test_svpext_lane_c8_3(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.]], i32 3)
		// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
		//
		// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_3u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.]], i32 3)
		// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]]
		//
		svbool_t test_svpext_lane_c8_3(svcount_t c) {
		return svpext_lane_c8(c, 3);
		}

		// CHECK-LABEL: @test_svpext_lane_c16_0(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
		// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_0u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
		// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		svbool_t test_svpext_lane_c16_0(svcount_t c) {
		return svpext_lane_c16(c, 0);
		}

		// CHECK-LABEL: @test_svpext_lane_c16_3(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.]], i32 3)
		// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
		// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_3u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.]], i32 3)
		// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
		// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		svbool_t test_svpext_lane_c16_3(svcount_t c) {
		return svpext_lane_c16(c, 3);
		}

		// CHECK-LABEL: @test_svpext_lane_c32_0(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
		// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_0u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
		// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		svbool_t test_svpext_lane_c32_0(svcount_t c) {
		return svpext_lane_c32(c, 0);
		}

		// CHECK-LABEL: @test_svpext_lane_c32_3(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.]], i32 3)
		// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
		// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_3u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.]], i32 3)
		// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
		// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		svbool_t test_svpext_lane_c32_3(svcount_t c) {
		return svpext_lane_c32(c, 3);
		}

		// CHECK-LABEL: @test_svpext_lane_c64_0(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
		// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_0u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
		// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		svbool_t test_svpext_lane_c64_0(svcount_t c) {
		return svpext_lane_c64(c, 0);
		}

		// CHECK-LABEL: @test_svpext_lane_c64_3(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.]], i32 3)
		// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
		// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_3u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.]], i32 3)
		// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
		// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
		//
		svbool_t test_svpext_lane_c64_3(svcount_t c) {
		return svpext_lane_c64(c, 3);
		}

		// CHECK-LABEL: @test_svpext_lane_c8_x2_0(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
		// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
		// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
		// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
		// CHECK-NEXT: ret <vscale x 32 x i1> [[TMP4]]
		//
		// CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.]], i32 0)
		// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
		// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
		// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
		// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
		// CPP-CHECK-NEXT: ret <vscale x 32 x i1> [[TMP4]]
		//
		svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) {
		return svpext_lane_c8_x2(c, 0);
		}

Admin message