Commit 7cad5a9e authored by Caroline Concatto's avatar Caroline Concatto
Browse files

[Clang][SVE2.1] Add svpext builtins

As described in: https://github.com/ARM-software/acle/pull/257

Reviewed By: hassnaa-arm

Differential Revision: https://reviews.llvm.org/D151081
parent be57381a
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -1862,11 +1862,13 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sv
let TargetGuard = "sve2p1" in {
def SVFCLAMP   : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>;
def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;

def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
def SVPEXT_X2     : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
}

let TargetGuard = "sve2p1" in {
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil",     MergeNone, "aarch64_sve_sclamp", [], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;

}
+2 −1
Original line number Diff line number Diff line
@@ -61,7 +61,8 @@
// -------------------
// prototype: return (arg, arg, ...)
//
// 2,3,4: array of default vectors
// 2,3,4: array of vectors
// .: indicator for multi-vector modifier that will follow (e.g. 2.x)
// v: void
// x: vector of signed integers
// u: vector of unsigned integers
+36 −1
Original line number Diff line number Diff line
@@ -9853,6 +9853,41 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
  return Call;
}
Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
  // Multi-vector results should be broken up into a single (wide) result
  // vector.
  auto *StructTy = dyn_cast<StructType>(Call->getType());
  if (!StructTy)
    return Call;
  auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
  if (!VTy)
    return Call;
  unsigned N = StructTy->getNumElements();
  // We may need to emit a cast to a svbool_t
  bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
  unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
  ScalableVectorType *WideVTy =
      ScalableVectorType::get(VTy->getElementType(), MinElts * N);
  Value *Ret = llvm::PoisonValue::get(WideVTy);
  for (unsigned I = 0; I < N; ++I) {
    Value *SRet = Builder.CreateExtractValue(Call, I);
    assert(SRet->getType() == VTy && "Unexpected type for result value");
    Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
    if (IsPredTy)
      SRet = EmitSVEPredicateCast(
          SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
    Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
  }
  Call = Ret;
  return Call;
}
Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
                                                  const CallExpr *E) {
  // Find out if any arguments are required to be integer constant expressions.
@@ -9966,7 +10001,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
      if (PredTy->getScalarType()->isIntegerTy(1))
        Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
    return Call;
    return FormSVEBuiltinResult(Call);
  }
  switch (BuiltinID) {
+5 −0
Original line number Diff line number Diff line
@@ -4292,6 +4292,11 @@ public:
  llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags,
                                  SmallVectorImpl<llvm::Value *> &Ops,
                                  unsigned IntID);
  /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider
  /// vector. It extracts the scalable vector from the struct and inserts into
  /// the wider vector. This avoids the error when allocating space in llvm
  /// for struct of scalable vectors if a function returns struct.
  llvm::Value *FormSVEBuiltinResult(llvm::Value *Call);
  llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);

  llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags,
+152 −0
Original line number Diff line number Diff line
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK

#include <arm_sve.h>

// CHECK-LABEL: @test_svpext_lane_c8_0(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_0u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svpext_lane_c8_0(svcount_t c) {
  return svpext_lane_c8(c, 0);
}

// CHECK-LABEL: @test_svpext_lane_c8_3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 3)
// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_3u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 3)
// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
//
svbool_t test_svpext_lane_c8_3(svcount_t c) {
  return svpext_lane_c8(c, 3);
}

// CHECK-LABEL: @test_svpext_lane_c16_0(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_0u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpext_lane_c16_0(svcount_t c) {
  return svpext_lane_c16(c, 0);
}

// CHECK-LABEL: @test_svpext_lane_c16_3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 3)
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_3u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 3)
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpext_lane_c16_3(svcount_t c) {
  return svpext_lane_c16(c, 3);
}

// CHECK-LABEL: @test_svpext_lane_c32_0(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_0u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpext_lane_c32_0(svcount_t c) {
  return svpext_lane_c32(c, 0);
}

// CHECK-LABEL: @test_svpext_lane_c32_3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 3)
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_3u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 3)
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpext_lane_c32_3(svcount_t c) {
  return svpext_lane_c32(c, 3);
}

// CHECK-LABEL: @test_svpext_lane_c64_0(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_0u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpext_lane_c64_0(svcount_t c) {
  return svpext_lane_c64(c, 0);
}

// CHECK-LABEL: @test_svpext_lane_c64_3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 3)
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_3u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 3)
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
// CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svpext_lane_c64_3(svcount_t c) {
  return svpext_lane_c64(c, 3);
}

// CHECK-LABEL: @test_svpext_lane_c8_x2_0(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
// CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t(
// CPP-CHECK-NEXT:  entry:
// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0)
// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 0
// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[TMP1]], i64 0)
// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1> } [[TMP0]], 1
// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]], i64 16)
// CPP-CHECK-NEXT:    ret <vscale x 32 x i1> [[TMP4]]
//
svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) {
  return svpext_lane_c8_x2(c, 0);
}
Loading