Commit dac7b23c authored by Simon Tatham's avatar Simon Tatham
Browse files

[ARM,MVE] Intrinsics for variable shift instructions.

This batch of intrinsics fills in all the shift instructions that take
a variable shift distance in a register, instead of an immediate. Some
of these instructions take a single shift distance in a scalar
register and apply it to all lanes; others take a vector of per-lane
distances.

These instructions are all basically one family, varying in whether
they saturate out-of-range values, and whether they round when bits
are shifted off the bottom. I've implemented them at the IR level by a
much smaller family of IR intrinsics, which take flag parameters to
indicate saturating and/or rounding (along with the usual one to
specify signed/unsigned integers).

An oddity is that all of them are //left// shift instructions – but if
you pass a negative shift count, they'll shift right. So the vector
shift distances are always vectors of //signed// integers, regardless
of whether you're considering the other input vector to be of signed
or unsigned. Also, even the simplest `vshlq` instruction in this
family (neither saturating nor rounding) has to be implemented as an
IR intrinsic, because the ordinary LLVM IR `shl` operation would
consider an out-of-range shift count to be undefined behavior.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72329
parent 31004809
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
@@ -688,6 +688,39 @@ let params = T.Int, pnt = PNT_NType in {
  defm vsri : DyadicImmShift<Vector, imm_1toN>;
}

multiclass VSHL_non_imm<string scalarSuffix, int q, int r,
                        PolymorphicNameType pnt_scalar_unpred = PNT_Type> {
  let pnt = pnt_scalar_unpred in {
    def scalarSuffix: Intrinsic<
      Vector, (args Vector:$in, s32:$sh),
      (IRInt<"vshl_scalar", [Vector]> $in, $sh,
           q, r, (unsignedflag Scalar))>;
  }
  def "_m" # scalarSuffix: Intrinsic<
    Vector, (args Vector:$in, s32:$sh, Predicate:$pred),
    (IRInt<"vshl_scalar_predicated", [Vector, Predicate]> $in, $sh,
         q, r, (unsignedflag Scalar), $pred)>;

  def "": Intrinsic<
    Vector, (args Vector:$in, SVector:$sh),
    (IRInt<"vshl_vector", [Vector, SVector]> $in, $sh,
         q, r, (unsignedflag Scalar))>;
  defm "": IntrinsicMX<
    Vector, (args Vector:$in, SVector:$sh, Predicate:$pred),
    (IRInt<"vshl_vector_predicated", [Vector, SVector, Predicate]> $in, $sh,
         q, r, (unsignedflag Scalar), $pred, $inactive),
    // The saturating shift intrinsics don't have an x variant, so we
    // set wantXVariant to 1 iff q == 0
    !eq(q, 0)>;
}

let params = T.Int in {
  defm vshlq   : VSHL_non_imm<"_r", 0, 0>;
  defm vqshlq  : VSHL_non_imm<"_r", 1, 0>;
  defm vrshlq  : VSHL_non_imm<"_n", 0, 1, PNT_NType>;
  defm vqrshlq : VSHL_non_imm<"_n", 1, 1, PNT_NType>;
}

// Base class for the scalar shift intrinsics.
class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
  Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {
+5 −2
Original line number Diff line number Diff line
@@ -276,13 +276,16 @@ class HalfSize<Type k> : ComplexType<(CTO_ScaleSize<1, 2> k)>;

// Unsigned<t> expects t to be a scalar type, and expands to the unsigned
// integer scalar of the same size. So it returns u16 if you give it s16 or
// f16 (or u16 itself).
// f16 (or u16 itself). Similarly, Signed<t> makes the type signed.
class Unsigned<Type t>: ComplexType<(CTO_CopyKind t, u32)>;
class Signed<Type t>: ComplexType<(CTO_CopyKind t, s32)>;

// UScalar and UVector expand to the unsigned-integer versions of
// Scalar and Vector.
// Scalar and Vector. SScalar and SVector are signed-integer versions.
def UScalar: Unsigned<Scalar>;
def UVector: VecOf<UScalar>;
def SScalar: Signed<Scalar>;
def SVector: VecOf<SScalar>;

// DblVector expands to a vector of scalars of size twice the size of Scalar.
// HalfVector, similarly, expands to a vector of half-sized scalars. And
+1638 −0

File added.

Preview size limit exceeded, changes collapsed.

+7 −0
Original line number Diff line number Diff line
@@ -955,6 +955,13 @@ defm int_arm_mve_vshrn: MVEPredicated<
    llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
    llvm_i32_ty /*top-half*/]>;

defm int_arm_mve_vshl_scalar: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/,
    llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
defm int_arm_mve_vshl_vector: MVEPredicatedM<
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty /*shiftcounts*/,
    llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;

// MVE scalar shifts.
class ARM_MVE_qrshift_single<list<LLVMType> value,
                             list<LLVMType> saturate = []> :
+49 −12
Original line number Diff line number Diff line
@@ -2727,13 +2727,32 @@ class MVE_shift_by_vec<string iname, string suffix, bit U,
  let validForTailPredication = 1;
}

multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
  def "" : MVE_shift_by_vec<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;

  def : Pat<(VTI.Vec (int_arm_mve_vshl_vector
                         (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
                         (i32 q), (i32 r), (i32 VTI.Unsigned))),
            (VTI.Vec (!cast<Instruction>(NAME)
                         (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh)))>;

  def : Pat<(VTI.Vec (int_arm_mve_vshl_vector_predicated
                         (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
                         (i32 q), (i32 r), (i32 VTI.Unsigned),
                         (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
            (VTI.Vec (!cast<Instruction>(NAME)
                         (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
                         ARMVCCThen, (VTI.Pred VCCR:$mask),
                         (VTI.Vec MQPR:$inactive)))>;
}

multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
  def s8  : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
  def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
  def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
  def u8  : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
  def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
  def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
  defm s8  : MVE_shift_by_vec_p<iname, MVE_v16s8, bit_4, bit_8>;
  defm s16 : MVE_shift_by_vec_p<iname, MVE_v8s16, bit_4, bit_8>;
  defm s32 : MVE_shift_by_vec_p<iname, MVE_v4s32, bit_4, bit_8>;
  defm u8  : MVE_shift_by_vec_p<iname, MVE_v16u8, bit_4, bit_8>;
  defm u16 : MVE_shift_by_vec_p<iname, MVE_v8u16, bit_4, bit_8>;
  defm u32 : MVE_shift_by_vec_p<iname, MVE_v4u32, bit_4, bit_8>;
}

defm MVE_VSHL_by_vec   : mve_shift_by_vec_multi<"vshl",   0b0, 0b0>;
@@ -4542,13 +4561,31 @@ class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
  let validForTailPredication = 1;
}

multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
  def "" : MVE_VxSHL_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;

  def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar
                         (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
                         (i32 q), (i32 r), (i32 VTI.Unsigned))),
            (VTI.Vec (!cast<Instruction>(NAME)
                         (VTI.Vec MQPR:$in), (i32 rGPR:$sh)))>;

  def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar_predicated
                         (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
                         (i32 q), (i32 r), (i32 VTI.Unsigned),
                         (VTI.Pred VCCR:$mask))),
            (VTI.Vec (!cast<Instruction>(NAME)
                         (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
                         ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
}

multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
  def s8  : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>;
  def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>;
  def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>;
  def u8  : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>;
  def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>;
  def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>;
  defm s8  : MVE_VxSHL_qr_p<iname, MVE_v16s8, bit_7, bit_17>;
  defm s16 : MVE_VxSHL_qr_p<iname, MVE_v8s16, bit_7, bit_17>;
  defm s32 : MVE_VxSHL_qr_p<iname, MVE_v4s32, bit_7, bit_17>;
  defm u8  : MVE_VxSHL_qr_p<iname, MVE_v16u8, bit_7, bit_17>;
  defm u16 : MVE_VxSHL_qr_p<iname, MVE_v8u16, bit_7, bit_17>;
  defm u32 : MVE_VxSHL_qr_p<iname, MVE_v4u32, bit_7, bit_17>;
}

defm MVE_VSHL_qr   : MVE_VxSHL_qr_types<"vshl",   0b0, 0b0>;
Loading