[Clang][SVE2.1] Add builtins and intrinsics for SVBFMLSLB/T (200a9252) · Commits · llvm-doe / llvm-project

clang/include/clang/Basic/arm_sve.td

+6 −0

Original line number	Diff line number	Diff line
		@@ -1959,6 +1959,12 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_
		def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
		def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
		def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;

		def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>;
		def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>;

		def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
		def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
		}

		let TargetGuard = "sve2p1" in {

clang/include/clang/Basic/arm_sve_sme_incl.td

+1 −0

Original line number	Diff line number	Diff line
		@@ -99,6 +99,7 @@
		// O: svfloat16_t
		// M: svfloat32_t
		// N: svfloat64_t
		// $: svbfloat16_t

		// J: Prefetch type (sv_prfop)

clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c

0 → 100644

+85 −0

Original line number	Diff line number	Diff line
		// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
		// REQUIRES: aarch64-registered-target

		// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \| opt -S -p mem2reg,instcombine,tailcallelim \| FileCheck %s
		// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \| opt -S -p mem2reg,instcombine,tailcallelim \| FileCheck %s -check-prefix=CPP-CHECK
		// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \| opt -S -p mem2reg,instcombine,tailcallelim \| FileCheck %s
		// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \| opt -S -p mem2reg,instcombine,tailcallelim \| FileCheck %s -check-prefix=CPP-CHECK
		// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

		#include <arm_sve.h>

		#ifdef SVE_OVERLOADED_FORMS
		// A simple used,unused... macro, long enough to represent any SVE builtin.
		#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
		#else
		#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
		#endif

		// BFMLSLB


		// CHECK-LABEL: @test_bfmlslb(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.]], <vscale x 8 x bfloat> [[ZN:%.]], <vscale x 8 x bfloat> [[ZM:%.]])
		// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
		//
		// CPP-CHECK-LABEL: @_Z12test_bfmlslbu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.]], <vscale x 8 x bfloat> [[ZN:%.]], <vscale x 8 x bfloat> [[ZM:%.]])
		// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
		//
		svfloat32_t test_bfmlslb(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
		{
		return SVE_ACLE_FUNC(svbfmlslb,_f32,,)(zda, zn, zm);
		}


		// CHECK-LABEL: @test_bfmlslb_lane(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.]], <vscale x 8 x bfloat> [[ZN:%.]], <vscale x 8 x bfloat> [[ZM:%.]], i32 7)
		// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
		//
		// CPP-CHECK-LABEL: @_Z17test_bfmlslb_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.]], <vscale x 8 x bfloat> [[ZN:%.]], <vscale x 8 x bfloat> [[ZM:%.]], i32 7)
		// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
		//
		svfloat32_t test_bfmlslb_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
		{
		return SVE_ACLE_FUNC(svbfmlslb_lane,_f32,,)(zda, zn, zm, 7);
		}

		// BFMLSLT


		// CHECK-LABEL: @test_bfmlslt(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.]], <vscale x 8 x bfloat> [[ZN:%.]], <vscale x 8 x bfloat> [[ZM:%.]])
		// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
		//
		// CPP-CHECK-LABEL: @_Z12test_bfmlsltu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.]], <vscale x 8 x bfloat> [[ZN:%.]], <vscale x 8 x bfloat> [[ZM:%.]])
		// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
		//
		svfloat32_t test_bfmlslt(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
		{
		return SVE_ACLE_FUNC(svbfmlslt,_f32,,)(zda, zn, zm);
		}


		// CHECK-LABEL: @test_bfmlslt_lane(
		// CHECK-NEXT: entry:
		// CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.]], <vscale x 8 x bfloat> [[ZN:%.]], <vscale x 8 x bfloat> [[ZM:%.]], i32 7)
		// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
		//
		// CPP-CHECK-LABEL: @_Z17test_bfmlslt_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
		// CPP-CHECK-NEXT: entry:
		// CPP-CHECK-NEXT: [[TMP0:%.]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.]], <vscale x 8 x bfloat> [[ZN:%.]], <vscale x 8 x bfloat> [[ZM:%.]], i32 7)
		// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
		//
		svfloat32_t test_bfmlslt_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
		{
		return SVE_ACLE_FUNC(svbfmlslt_lane,_f32,,)(zda, zn, zm, 7);
		}

clang/utils/TableGen/SveEmitter.cpp

+7 −0

Original line number	Diff line number	Diff line
		@@ -852,6 +852,13 @@ void SVEType::applyModifier(char Mod) {
		NumVectors = 0;
		Signed = false;
		break;
		case '$':
		Predicate = false;
		Svcount = false;
		Float = false;
		BFloat = true;
		ElementBitwidth = 16;
		break;
		case '}':
		Predicate = false;
		Signed = true;

llvm/include/llvm/IR/IntrinsicsAArch64.td

+16 −0

Original line number	Diff line number	Diff line
		@@ -3011,6 +3011,16 @@ let TargetPrefix = "aarch64" in {
		[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
		[IntrNoMem]>;

		class SME2_BFMLS_Intrinsic
		: DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
		[llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty],
		[IntrNoMem]>;

		class SME2_BFMLS_Lane_Intrinsic
		: DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
		[llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty],
		[IntrNoMem, ImmArg<ArgIndex<3>>]>;

		class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
		: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
		[llvm_i32_ty],
		@@ -3214,6 +3224,12 @@ let TargetPrefix = "aarch64" in {
		def int_aarch64_sme_usmla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
		def int_aarch64_sme_usmla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;

		def int_aarch64_sve_bfmlslb : SME2_BFMLS_Intrinsic;
		def int_aarch64_sve_bfmlslb_lane : SME2_BFMLS_Lane_Intrinsic;

		def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic;
		def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic;

		// Multi-vector signed saturating doubling multiply high

		def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic;