Commit 31004809 authored by Simon Tatham's avatar Simon Tatham
Browse files

[ARM,MVE] Intrinsics for partial-overwrite imm shifts.

This batch of intrinsics covers two sets of immediate shift
instructions, which have in common that they only overwrite part of
their output register and so they need an extra input giving its
previous value.

The VSLI and VSRI instructions shift each lane of the input vector
left or right just as if they were normal immediate VSHL/VSHR, but
then they only overwrite the output bits that correspond to actual
shifted bits of the input. So VSLI will leave the low n bits of each
output lane unchanged, and VSRI the same with the top n bits.

The V[Q][R]SHR[U]N family are all narrowing shifts: they take an input
vector of 2n-bit integers, shift each lane right by a constant, and
then narrowing the shifted result to only n bits. So they only
overwrite half of the n-bit lanes in the output register, and the B/T
suffix indicates whether it's the bottom or top half of each 2n-bit
lane.

I've implemented the whole of the latter family using a single IR
intrinsic `vshrn`, which takes a lot of i32 parameters indicating
which instruction it expands to (by specifying signedness of the input
and output types, whether it saturates and/or rounds, etc).

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72328
parent ba129c7d
Loading
Loading
Loading
Loading
+37 −0
Original line number Diff line number Diff line
@@ -651,6 +651,43 @@ multiclass vshll_imm<int top> {
defm vshllbq : vshll_imm<0>;
defm vshlltq : vshll_imm<1>;

multiclass DyadicImmShift<Type outtype, Immediate imm, string intname = NAME,
                          dag extraargs = (?)> {
  foreach intparams = [!if(!eq(!cast<string>(outtype), !cast<string>(Vector)),
                           [Vector], [outtype, Vector])] in {
    def q_n: Intrinsic<
        outtype, (args outtype:$a, Vector:$b, imm:$sh),
        !con((IRInt<intname, intparams> $a, $b, $sh), extraargs)>;

    def q_m_n: Intrinsic<
        outtype, (args outtype:$a, Vector:$b, imm:$sh, Predicate:$pred),
        !con((IRInt<intname # "_predicated", intparams # [Predicate]>
                 $a, $b, $sh), extraargs, (? $pred))>;
  }
}

multiclass VSHRN<Type outtype, Immediate imm, dag extraargs> {
  defm b: DyadicImmShift<outtype, imm, "vshrn", !con(extraargs, (? 0))>;
  defm t: DyadicImmShift<outtype, imm, "vshrn", !con(extraargs, (? 1))>;
}

let params = [s16, s32, u16, u32], pnt = PNT_NType in {
  foreach U = [(unsignedflag Scalar)] in {
    defm vshrn   : VSHRN<HalfVector, imm_1toHalfN, (? 0,0,U,U)>;
    defm vqshrn  : VSHRN<HalfVector, imm_1toHalfN, (? 1,0,U,U)>;
    defm vrshrn  : VSHRN<HalfVector, imm_1toHalfN, (? 0,1,U,U)>;
    defm vqrshrn : VSHRN<HalfVector, imm_1toHalfN, (? 1,1,U,U)>;
  }
}
let params = [s16, s32], pnt = PNT_NType in {
  defm vqshrun  : VSHRN<UHalfVector, imm_1toHalfN, (? 1,0,1,0)>;
  defm vqrshrun : VSHRN<UHalfVector, imm_1toHalfN, (? 1,0,1,0)>;
}
let params = T.Int, pnt = PNT_NType in {
  defm vsli : DyadicImmShift<Vector, imm_1toN>;
  defm vsri : DyadicImmShift<Vector, imm_1toN>;
}

// Base class for the scalar shift intrinsics.
class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
  Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {
+19 −5
Original line number Diff line number Diff line
@@ -190,7 +190,10 @@ def CTO_Pred: ComplexTypeOp;
class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }
class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }
def CTO_CopyKind: ComplexTypeOp;
def CTO_DoubleSize: ComplexTypeOp;
class CTO_ScaleSize<int num_, int denom_>: ComplexTypeOp {
  int num = num_;
  int denom = denom_;
}

// -----------------------------------------------------------------------------
// Instances of Type intended to be used directly in the specification of an
@@ -268,7 +271,8 @@ class CopyKind<Type s, Type k>: ComplexType<(CTO_CopyKind s, k)>;
// DoubleSize<k> expects k to be a scalar type. It returns a scalar type
// whose kind (signed, unsigned or float) matches that of k, and whose size
// is double that of k, if possible.
class DoubleSize<Type k>: ComplexType<(CTO_DoubleSize k)>;
class DoubleSize<Type k> : ComplexType<(CTO_ScaleSize<2, 1> k)>;
class HalfSize<Type k>   : ComplexType<(CTO_ScaleSize<1, 2> k)>;

// Unsigned<t> expects t to be a scalar type, and expands to the unsigned
// integer scalar of the same size. So it returns u16 if you give it s16 or
@@ -280,9 +284,12 @@ class Unsigned<Type t>: ComplexType<(CTO_CopyKind t, u32)>;
def UScalar: Unsigned<Scalar>;
def UVector: VecOf<UScalar>;

// DblVector expands to a vector of scalars of size twice the size of
// Scalar.
// DblVector expands to a vector of scalars of size twice the size of Scalar.
// HalfVector, similarly, expands to a vector of half-sized scalars. And
// UHalfVector is a vector of half-sized _unsigned integers_.
def DblVector: VecOf<DoubleSize<Scalar>>;
def HalfVector: VecOf<HalfSize<Scalar>>;
def UHalfVector: VecOf<Unsigned<HalfSize<Scalar>>>;

// Expands to the 32-bit integer of the same signedness as Scalar.
def Scalar32: CopyKind<u32, Scalar>;
@@ -305,7 +312,10 @@ class IB_ConstRange<int lo_, int hi_> : ImmediateBounds {
}
def IB_UEltValue : ImmediateBounds;
def IB_LaneIndex : ImmediateBounds;
class IB_EltBit<int base_> : ImmediateBounds { int base = base_; }
class IB_EltBit<int base_, Type type_ = Scalar> : ImmediateBounds {
  int base = base_;
  Type type = type_;
}

// -----------------------------------------------------------------------------
// End-user definitions for immediate arguments.
@@ -327,8 +337,12 @@ def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
//
// imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
// inclusive.
//
// imm_1toHalfN is like imm_1toN, but applied to a half-width type.
// (So if Scalar is s16, for example, it'll give you the range 1 to 8.)
def imm_1toN : Immediate<sint, IB_EltBit<1>>;
def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>;
def imm_1toHalfN : Immediate<sint, IB_EltBit<1, HalfSize<Scalar>>>;

// imm_lane has to be the index of a vector lane in the main vector type, i.e
// it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
+1565 −0

File added.

Preview size limit exceeded, changes collapsed.

+7 −4
Original line number Diff line number Diff line
@@ -1099,14 +1099,16 @@ const Type *MveEmitter::getType(DagInit *D, const Type *Param) {
    PrintFatalError("Cannot find a type to satisfy CopyKind");
  }

  if (Op->getName() == "CTO_DoubleSize") {
  if (Op->isSubClassOf("CTO_ScaleSize")) {
    const ScalarType *STKind = cast<ScalarType>(getType(D->getArg(0), Param));
    int Num = Op->getValueAsInt("num"), Denom = Op->getValueAsInt("denom");
    unsigned DesiredSize = STKind->sizeInBits() * Num / Denom;
    for (const auto &kv : ScalarTypes) {
      const ScalarType *RT = kv.second.get();
      if (RT->kind() == STKind->kind() && RT->sizeInBits() == 2*STKind->sizeInBits())
      if (RT->kind() == STKind->kind() && RT->sizeInBits() == DesiredSize)
        return RT;
    }
    PrintFatalError("Cannot find a type to satisfy DoubleSize");
    PrintFatalError("Cannot find a type to satisfy ScaleSize");
  }

  PrintFatalError("Bad operator in type dag expression");
@@ -1338,7 +1340,8 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
        } else if (Bounds->isSubClassOf("IB_EltBit")) {
          IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
          IA.i1 = Bounds->getValueAsInt("base");
          IA.i2 = IA.i1 + Param->sizeInBits() - 1;
          const Type *T = ME.getType(Bounds->getValueAsDef("type"), Param);
          IA.i2 = IA.i1 + T->sizeInBits() - 1;
        } else {
          PrintFatalError("unrecognised ImmediateBounds subclass");
        }
+11 −0
Original line number Diff line number Diff line
@@ -944,6 +944,17 @@ defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
   [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
                       llvm_i32_ty /*top-half*/]>;

defm int_arm_mve_vsli: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
defm int_arm_mve_vsri: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;

defm int_arm_mve_vshrn: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
    llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/,
    llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
    llvm_i32_ty /*top-half*/]>;

// MVE scalar shifts.
class ARM_MVE_qrshift_single<list<LLVMType> value,
                             list<LLVMType> saturate = []> :
Loading