Unverified Commit 6ef67130 authored by Boyao Wang's avatar Boyao Wang Committed by GitHub
Browse files

[RISCV][CodeGen] Add initial vzip codegen support (#194548)

Add initial support for vzip instruction, which is included in zvzip
extension. It is used to lower VECTOR_SHUFFLE with interleave pattern
and VECTOR_INTERLEAVE.
parent 329853dd
Loading
Loading
Loading
Loading
+55 −2
Original line number Diff line number Diff line
@@ -5139,6 +5139,15 @@ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
  return SDValue();
}
static bool isLegalVTForZvzipOperand(MVT VT, const RISCVSubtarget &Subtarget,
                                     const TargetLowering &TLI) {
  MVT ContainerVT = VT;
  if (VT.isFixedLengthVector())
    ContainerVT = getContainerForFixedLengthVector(TLI, VT, Subtarget);
  // Determine LMUL of the container vector.
  return RISCVTargetLowering::getLMUL(ContainerVT) != RISCVVType::LMUL_8;
}
/// Is this shuffle interleaving contiguous elements from one vector into the
/// even elements and contiguous elements from another vector into the odd
/// elements. \p EvenSrc will contain the element that should be in the first
@@ -5649,6 +5658,32 @@ static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
  return Res;
}
static SDValue lowerZvzipVZIP(SDValue Op0, SDValue Op1, const SDLoc &DL,
                              SelectionDAG &DAG,
                              const RISCVSubtarget &Subtarget) {
  assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());
  MVT VT = Op0.getSimpleValueType();
  MVT IntVT = VT.changeVectorElementTypeToInteger();
  Op0 = DAG.getBitcast(IntVT, Op0);
  Op1 = DAG.getBitcast(IntVT, Op1);
  MVT ContainerVT = IntVT;
  if (VT.isFixedLengthVector()) {
    ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
    Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
    Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
  }
  MVT ResVT = ContainerVT.getDoubleNumVectorElementsVT();
  auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget);
  SDValue Passthru = DAG.getUNDEF(ResVT);
  SDValue Res =
      DAG.getNode(RISCVISD::VZIP_VL, DL, ResVT, Op0, Op1, Passthru, Mask, VL);
  if (IntVT.isFixedLengthVector())
    Res = convertFromScalableVector(IntVT.getDoubleNumVectorElementsVT(), Res,
                                    DAG, Subtarget);
  Res = DAG.getBitcast(VT.getDoubleNumVectorElementsVT(), Res);
  return Res;
}
// Given a vector a, b, c, d return a vector Factor times longer
// with Factor-1 undef's between elements. Ex:
//   a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
@@ -6423,8 +6458,11 @@ SDValue RISCVTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
      OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
    }
    // Prefer vzip2a if available.
    // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
    // Prefer vzip2a or vzip if available.
    // TODO: Extend to matching ri.vzip2b or vzip if EvenSrc and OddSrc allow.
    if (Subtarget.hasStdExtZvzip() &&
        isLegalVTForZvzipOperand(VT, Subtarget, *this))
      return lowerZvzipVZIP(EvenV, OddV, DL, DAG, Subtarget);
    if (Subtarget.hasVendorXRivosVizip()) {
      EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
      OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
@@ -12970,6 +13008,21 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
    return DAG.getMergeValues(Loads, DL);
  }
  if (Subtarget.hasStdExtZvzip() && !Op.getOperand(0).isUndef() &&
      !Op.getOperand(1).isUndef()) {
    MVT VT = Op->getSimpleValueType(0);
    if (isLegalVTForZvzipOperand(VT, Subtarget, *this)) {
      // Freeze the sources so we can increase their use count.
      SDValue V1 = DAG.getFreeze(Op->getOperand(0));
      SDValue V2 = DAG.getFreeze(Op->getOperand(1));
      SDValue Interleaved = lowerZvzipVZIP(V1, V2, DL, DAG, Subtarget);
      SDValue Lo = DAG.getExtractSubvector(DL, VT, Interleaved, 0);
      SDValue Hi = DAG.getExtractSubvector(DL, VT, Interleaved,
                                           VT.getVectorMinNumElements());
      return DAG.getMergeValues({Lo, Hi}, DL);
    }
  }
  // Use ri.vzip2{a,b} if available
  // TODO: Figure out the best lowering for the spread variants
  if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
+25 −0
Original line number Diff line number Diff line
@@ -139,6 +139,15 @@ defm : VPatBinaryW_VV<"int_riscv_vzip", "PseudoVZIP", AllZvzipVectors,
                      ExtraPreds = [HasStdExtZvzip],
                      requireMinimal = true>;

// (vd (op vs2, vs1, passthru, mask, vl))
def SDT_RISCVZip_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisVec<1>,
                                           SDTCisSubVecOfVec<1, 0>,
                                           SDTCisSameAs<1, 2>,
                                           SDTCisSameAs<0, 3>,
                                           SDTCVecEltisVT<4, i1>,
                                           SDTCisSameNumEltsAs<1, 4>,
                                           SDTCisVT<5, XLenVT>]>;

def SDT_RISCVVecBinOp_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
                                                SDTCisSameAs<0, 2>,
                                                SDTCisVec<0>,
@@ -150,8 +159,24 @@ def SDT_RISCVVecBinOp_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
let HasPassthruOp = true, HasMaskOp = true in {
def vpaire_vl  : RVSDNode<"VPAIRE_VL", SDT_RISCVVecBinOp_VL>;
def vpairo_vl  : RVSDNode<"VPAIRO_VL", SDT_RISCVVecBinOp_VL>;
def vzip_vl    : RVSDNode<"VZIP_VL", SDT_RISCVZip_VL>;
} // HasPassthruOp = true, HasMaskOp = true

multiclass VPatVZIP<SDPatternOperator vop, string instruction_name> {
  foreach VtiToWti = AllZvzipVectors in {
    defvar vti = VtiToWti.Vti;
    defvar wti = VtiToWti.Wti;
    let Predicates = !listconcat([HasStdExtZvzip],
                                 GetVTypeMinimalPredicates<vti>.Predicates) in {
      def : VPatBinaryVL_V<vop, instruction_name, "VV",
                           wti.Vector, vti.Vector, vti.Vector, vti.Mask,
                           vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
                           vti.RegClass>;
    }
  }
}

defm : VPatVZIP<vzip_vl, "PseudoVZIP">;
defm : VPatBinaryVL_VV<vpaire_vl, "PseudoVPAIRE", AllVectors,
                       ExtraPreds = [HasStdExtZvzip],
                       requireMinimal = true>;
+23 −63
Original line number Diff line number Diff line
@@ -30,9 +30,7 @@ define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) {
; ZVZIP-LABEL: interleave_v2i8:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v10, v8, v9
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v10, a0, v9
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <2 x i8> %x, <2 x i8> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -59,9 +57,7 @@ define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) {
; ZVZIP-LABEL: interleave_v2i16:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v10, v8, v9
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v10, a0, v9
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <2 x i16> %x, <2 x i16> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -89,9 +85,7 @@ define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) {
; ZVZIP-LABEL: interleave_v2i32:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v10, v9, v8
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v10, a0, v8
; ZVZIP-NEXT:    vzip.vv v10, v9, v8
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
@@ -179,9 +173,7 @@ define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) {
; ZVZIP-LABEL: interleave_v4i8:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v10, v9, v8
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v10, a0, v8
; ZVZIP-NEXT:    vzip.vv v10, v9, v8
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
@@ -218,9 +210,7 @@ define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) {
; ZVZIP-LABEL: interleave_v4i16:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v10, v8, v9
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v10, a0, v9
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 4, i32 poison, i32 5, i32 2, i32 poison, i32 3, i32 7>
@@ -260,9 +250,7 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
; ZVZIP-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    vwaddu.vv v8, v11, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v8, a0, v10
; ZVZIP-NEXT:    vzip.vv v8, v11, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
  ret <8 x i32> %a
@@ -305,9 +293,7 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) {
; ZVZIP-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
; ZVZIP-NEXT:    vslidedown.vi v10, v9, 2
; ZVZIP-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v9, v8, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v9, a0, v10
; ZVZIP-NEXT:    vzip.vv v9, v8, v10
; ZVZIP-NEXT:    vmv1r.v v8, v9
; ZVZIP-NEXT:    ret
  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 6, i32 1, i32 7>
@@ -360,10 +346,10 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vslideup.vi v10, v9, 1, v0.t
; ZVZIP-NEXT:    vmv.v.i v0, 10
; ZVZIP-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; ZVZIP-NEXT:    vzext.vf2 v9, v8
; ZVZIP-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; ZVZIP-NEXT:    vzip.vv v11, v8, v9
; ZVZIP-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; ZVZIP-NEXT:    vmerge.vvm v8, v9, v10, v0
; ZVZIP-NEXT:    vmerge.vvm v8, v11, v10, v0
; ZVZIP-NEXT:    ret
  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
  ret <4 x i32> %a
@@ -398,9 +384,7 @@ define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) {
; ZVZIP-LABEL: interleave_v8i8:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v10, v8, v9
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v10, a0, v9
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -441,9 +425,7 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
; ZVZIP-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    vwaddu.vv v8, v10, v11
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v8, a0, v11
; ZVZIP-NEXT:    vzip.vv v8, v10, v11
; ZVZIP-NEXT:    ret
  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
  ret <16 x i16> %a
@@ -482,9 +464,7 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
; ZVZIP-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; ZVZIP-NEXT:    vmv2r.v v12, v10
; ZVZIP-NEXT:    vmv2r.v v14, v8
; ZVZIP-NEXT:    vwaddu.vv v8, v14, v12
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v8, a0, v12
; ZVZIP-NEXT:    vzip.vv v8, v14, v12
; ZVZIP-NEXT:    ret
  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
  ret <16 x i32> %a
@@ -524,9 +504,7 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
; ZVZIP-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    vwaddu.vv v8, v11, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v8, a0, v10
; ZVZIP-NEXT:    vzip.vv v8, v11, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <16 x i8> %x, <16 x i8> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
  ret <32 x i8> %a
@@ -566,9 +544,7 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
; ZVZIP-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; ZVZIP-NEXT:    vmv2r.v v12, v10
; ZVZIP-NEXT:    vmv2r.v v14, v8
; ZVZIP-NEXT:    vwaddu.vv v8, v14, v12
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v8, a0, v12
; ZVZIP-NEXT:    vzip.vv v8, v14, v12
; ZVZIP-NEXT:    ret
  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
  ret <32 x i16> %a
@@ -654,9 +630,7 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
; ZVZIP-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
; ZVZIP-NEXT:    vmv2r.v v12, v10
; ZVZIP-NEXT:    vmv2r.v v14, v8
; ZVZIP-NEXT:    vwaddu.vv v8, v14, v12
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v8, a0, v12
; ZVZIP-NEXT:    vzip.vv v8, v14, v12
; ZVZIP-NEXT:    ret
  %a = shufflevector <32 x i8> %x, <32 x i8> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
  ret <64 x i8> %a
@@ -922,9 +896,7 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) {
; ZVZIP-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
; ZVZIP-NEXT:    vslidedown.vi v10, v8, 2
; ZVZIP-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v9, v8, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v9, a0, v10
; ZVZIP-NEXT:    vzip.vv v9, v8, v10
; ZVZIP-NEXT:    vmv1r.v v8, v9
; ZVZIP-NEXT:    ret
  %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -1016,9 +988,7 @@ define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) {
; ZVZIP-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
; ZVZIP-NEXT:    vslidedown.vi v10, v8, 2
; ZVZIP-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v9, v8, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v9, a0, v10
; ZVZIP-NEXT:    vzip.vv v9, v8, v10
; ZVZIP-NEXT:    vmv1r.v v8, v9
; ZVZIP-NEXT:    ret
  %a = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -1061,9 +1031,7 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
; ZVZIP-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
; ZVZIP-NEXT:    vslidedown.vi v10, v8, 2
; ZVZIP-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v9, v8, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v9, a0, v10
; ZVZIP-NEXT:    vzip.vv v9, v8, v10
; ZVZIP-NEXT:    vmv1r.v v8, v9
; ZVZIP-NEXT:    ret
  %a = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -1170,9 +1138,7 @@ define <8 x i8> @unary_interleave_v8i8(<8 x i8> %x) {
; ZVZIP-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
; ZVZIP-NEXT:    vslidedown.vi v10, v8, 4
; ZVZIP-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v9, v8, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v9, a0, v10
; ZVZIP-NEXT:    vzip.vv v9, v8, v10
; ZVZIP-NEXT:    vmv1r.v v8, v9
; ZVZIP-NEXT:    ret
  %a = shufflevector <8 x i8> %x, <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 poison, i32 6, i32 3, i32 7>
@@ -1215,9 +1181,7 @@ define <8 x i16> @unary_interleave_v8i16(<8 x i16> %x) {
; ZVZIP-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
; ZVZIP-NEXT:    vslidedown.vi v10, v8, 4
; ZVZIP-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v9, v10, v8
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v9, a0, v8
; ZVZIP-NEXT:    vzip.vv v9, v10, v8
; ZVZIP-NEXT:    vmv1r.v v8, v9
; ZVZIP-NEXT:    ret
  %a = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 4, i32 poison, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
@@ -1260,9 +1224,7 @@ define <8 x i32> @unary_interleave_v8i32(<8 x i32> %x) {
; ZVZIP-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
; ZVZIP-NEXT:    vslidedown.vi v12, v8, 4
; ZVZIP-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v10, v8, v12
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v10, a0, v12
; ZVZIP-NEXT:    vzip.vv v10, v8, v12
; ZVZIP-NEXT:    vmv2r.v v8, v10
; ZVZIP-NEXT:    ret
  %a = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -1332,9 +1294,7 @@ define <16 x i16> @interleave_slp(<8 x i16> %v0, <8 x i16> %v1) {
; ZVZIP-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    vwaddu.vv v8, v11, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v8, a0, v10
; ZVZIP-NEXT:    vzip.vv v8, v11, v10
; ZVZIP-NEXT:    ret
entry:
  %v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison, i32 4, i32 poison, i32 5, i32 poison, i32 6, i32 poison, i32 7, i32 poison>
+4 −8
Original line number Diff line number Diff line
@@ -4120,9 +4120,7 @@ define { <8 x float>, <8 x float> } @interleave_deinterleave2(<8 x float> %a, <8
; ZVZIP-LABEL: interleave_deinterleave2:
; ZVZIP:       # %bb.0: # %entry
; ZVZIP-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; ZVZIP-NEXT:    vwaddu.vv v12, v8, v10
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v12, a0, v10
; ZVZIP-NEXT:    vzip.vv v12, v8, v10
; ZVZIP-NEXT:    li a0, 32
; ZVZIP-NEXT:    vnsrl.wx v10, v12, a0
; ZVZIP-NEXT:    vnsrl.wi v8, v12, 0
@@ -4159,11 +4157,9 @@ define <16 x float> @deinterleave_interleave2(<16 x float> %arg) {
; ZVZIP:       # %bb.0: # %entry
; ZVZIP-NEXT:    li a0, 32
; ZVZIP-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; ZVZIP-NEXT:    vnsrl.wi v12, v8, 0
; ZVZIP-NEXT:    vnsrl.wx v14, v8, a0
; ZVZIP-NEXT:    vwaddu.vv v8, v12, v14
; ZVZIP-NEXT:    li a0, -1
; ZVZIP-NEXT:    vwmaccu.vx v8, a0, v14
; ZVZIP-NEXT:    vnsrl.wx v12, v8, a0
; ZVZIP-NEXT:    vnsrl.wi v14, v8, 0
; ZVZIP-NEXT:    vzip.vv v8, v14, v12
; ZVZIP-NEXT:    ret
entry:
  %0 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %arg)
+11 −25
Original line number Diff line number Diff line
@@ -72,8 +72,7 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
; ZVZIP-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
; ZVZIP-NEXT:    vslidedown.vi v10, v8, 16
; ZVZIP-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; ZVZIP-NEXT:    vwsll.vi v12, v10, 8
; ZVZIP-NEXT:    vwaddu.wv v12, v12, v8
; ZVZIP-NEXT:    vzip.vv v12, v8, v10
; ZVZIP-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
; ZVZIP-NEXT:    vmsne.vi v0, v12, 0
; ZVZIP-NEXT:    ret
@@ -114,8 +113,7 @@ define <16 x i16> @vector_interleave_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b) {
; ZVZIP-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    vwsll.vi v8, v10, 16
; ZVZIP-NEXT:    vwaddu.wv v8, v8, v11
; ZVZIP-NEXT:    vzip.vv v8, v11, v10
; ZVZIP-NEXT:    ret
	   %res = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b)
	   ret <16 x i16> %res
@@ -155,9 +153,7 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
; ZVZIP-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    li a0, 32
; ZVZIP-NEXT:    vwsll.vx v8, v10, a0
; ZVZIP-NEXT:    vwaddu.wv v8, v8, v11
; ZVZIP-NEXT:    vzip.vv v8, v11, v10
; ZVZIP-NEXT:    ret
	   %res = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %b)
	   ret <8 x i32> %res
@@ -1086,8 +1082,7 @@ define <4 x half> @vector_interleave_v4f16_v2f16(<2 x half> %a, <2 x half> %b) {
; ZVZIP-LABEL: vector_interleave_v4f16_v2f16:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; ZVZIP-NEXT:    vwsll.vi v10, v9, 16
; ZVZIP-NEXT:    vwaddu.wv v10, v10, v8
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
	   %res = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %a, <2 x half> %b)
@@ -1122,8 +1117,7 @@ define <4 x bfloat> @vector_interleave_v4bf16_v2bf16(<2 x bfloat> %a, <2 x bfloa
; ZVZIP-LABEL: vector_interleave_v4bf16_v2bf16:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; ZVZIP-NEXT:    vwsll.vi v10, v9, 16
; ZVZIP-NEXT:    vwaddu.wv v10, v10, v8
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
	   %res = call <4 x bfloat> @llvm.vector.interleave2.v4bf16(<2 x bfloat> %a, <2 x bfloat> %b)
@@ -1158,8 +1152,7 @@ define <8 x half> @vector_interleave_v8f16_v4f16(<4 x half> %a, <4 x half> %b) {
; ZVZIP-LABEL: vector_interleave_v8f16_v4f16:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; ZVZIP-NEXT:    vwsll.vi v10, v9, 16
; ZVZIP-NEXT:    vwaddu.wv v10, v10, v8
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
	   %res = call <8 x half> @llvm.vector.interleave2.v8f16(<4 x half> %a, <4 x half> %b)
@@ -1194,8 +1187,7 @@ define <8 x bfloat> @vector_interleave_v8bf16_v4bf16(<4 x bfloat> %a, <4 x bfloa
; ZVZIP-LABEL: vector_interleave_v8bf16_v4bf16:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; ZVZIP-NEXT:    vwsll.vi v10, v9, 16
; ZVZIP-NEXT:    vwaddu.wv v10, v10, v8
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
	   %res = call <8 x bfloat> @llvm.vector.interleave2.v8bf16(<4 x bfloat> %a, <4 x bfloat> %b)
@@ -1230,10 +1222,8 @@ define <4 x float> @vector_interleave_v4f32_v2f32(<2 x float> %a, <2 x float> %b
;
; ZVZIP-LABEL: vector_interleave_v4f32_v2f32:
; ZVZIP:       # %bb.0:
; ZVZIP-NEXT:    li a0, 32
; ZVZIP-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; ZVZIP-NEXT:    vwsll.vx v10, v9, a0
; ZVZIP-NEXT:    vwaddu.wv v10, v10, v8
; ZVZIP-NEXT:    vzip.vv v10, v8, v9
; ZVZIP-NEXT:    vmv1r.v v8, v10
; ZVZIP-NEXT:    ret
	   %res = call <4 x float> @llvm.vector.interleave2.v4f32(<2 x float> %a, <2 x float> %b)
@@ -1273,8 +1263,7 @@ define <16 x half> @vector_interleave_v16f16_v8f16(<8 x half> %a, <8 x half> %b)
; ZVZIP-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    vwsll.vi v8, v10, 16
; ZVZIP-NEXT:    vwaddu.wv v8, v8, v11
; ZVZIP-NEXT:    vzip.vv v8, v11, v10
; ZVZIP-NEXT:    ret
	   %res = call <16 x half> @llvm.vector.interleave2.v16f16(<8 x half> %a, <8 x half> %b)
	   ret <16 x half> %res
@@ -1313,8 +1302,7 @@ define <16 x bfloat> @vector_interleave_v16bf16_v8bf16(<8 x bfloat> %a, <8 x bfl
; ZVZIP-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    vwsll.vi v8, v10, 16
; ZVZIP-NEXT:    vwaddu.wv v8, v8, v11
; ZVZIP-NEXT:    vzip.vv v8, v11, v10
; ZVZIP-NEXT:    ret
	   %res = call <16 x bfloat> @llvm.vector.interleave2.v16bf16(<8 x bfloat> %a, <8 x bfloat> %b)
	   ret <16 x bfloat> %res
@@ -1354,9 +1342,7 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
; ZVZIP-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; ZVZIP-NEXT:    vmv1r.v v10, v9
; ZVZIP-NEXT:    vmv1r.v v11, v8
; ZVZIP-NEXT:    li a0, 32
; ZVZIP-NEXT:    vwsll.vx v8, v10, a0
; ZVZIP-NEXT:    vwaddu.wv v8, v8, v11
; ZVZIP-NEXT:    vzip.vv v8, v11, v10
; ZVZIP-NEXT:    ret
	   %res = call <8 x float> @llvm.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
	   ret <8 x float> %res
Loading