Unverified Commit 144b2f57 authored by Philip Reames's avatar Philip Reames Committed by GitHub
Browse files

[RISCV] Start vslide1down sequence with a dependency breaking splat (#72691)

If we are using entirely vslide1downs to initialize an otherwise undef
vector, we end up with an implicit_def as the source of the first
vslide1down. This register has to be allocated, and creates false
dependencies with surrounding code.

Instead, start our sequence with a vmv.v.x in the hopes of creating a
dependency breaking idiom. Unfortunately, it's not clear this will
actually work as due to the VL=0 special case for T.A. the hardware has
to work pretty hard to recognize that the vmv.v.x actually has no source
dependence. I don't think we can reasonable expect all hardware to have
optimized this case, but I also don't see any downside in prefering it.
parent fab690d6
Loading
Loading
Loading
Loading
+12 −1
Original line number Diff line number Diff line
@@ -3798,13 +3798,24 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
  const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
  SDValue Vec = DAG.getUNDEF(ContainerVT);
  SDValue Vec;
  UndefCount = 0;
  for (SDValue V : Op->ops()) {
    if (V.isUndef()) {
      UndefCount++;
      continue;
    }
    // Start our sequence with a TA splat in the hopes that hardware is able to
    // recognize there's no dependency on the prior value of our temporary
    // register.
    if (!Vec) {
      Vec = DAG.getSplatVector(VT, DL, V);
      Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
      UndefCount = 0;
      continue;
    }
    if (UndefCount) {
      const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
      Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
+2 −2
Original line number Diff line number Diff line
@@ -495,7 +495,7 @@ define <4 x i16> @bitcast_i64_v4i16(i64 %a) {
; RV32ELEN32-LABEL: bitcast_i64_v4i16:
; RV32ELEN32:       # %bb.0:
; RV32ELEN32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
; RV32ELEN32-NEXT:    vslide1down.vx v8, v8, a0
; RV32ELEN32-NEXT:    vmv.v.x v8, a0
; RV32ELEN32-NEXT:    vslide1down.vx v8, v8, a1
; RV32ELEN32-NEXT:    ret
;
@@ -530,7 +530,7 @@ define <2 x i32> @bitcast_i64_v2i32(i64 %a) {
; RV32ELEN32-LABEL: bitcast_i64_v2i32:
; RV32ELEN32:       # %bb.0:
; RV32ELEN32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
; RV32ELEN32-NEXT:    vslide1down.vx v8, v8, a0
; RV32ELEN32-NEXT:    vmv.v.x v8, a0
; RV32ELEN32-NEXT:    vslide1down.vx v8, v8, a1
; RV32ELEN32-NEXT:    ret
;
+29 −39
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vslide1down.vx v8, v8, a0
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI0_0)
; CHECK-NEXT:    vle32.v v9, (a0)
@@ -30,7 +30,7 @@ define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e,
; CHECK-LABEL: add_constant_rhs_8xi32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT:    vslide1down.vx v8, v8, a0
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    vslide1down.vx v8, v8, a1
; CHECK-NEXT:    vslide1down.vx v8, v8, a2
; CHECK-NEXT:    vslide1down.vx v8, v8, a3
@@ -67,7 +67,7 @@ define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: sub_constant_rhs:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vslide1down.vx v8, v8, a0
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI2_0)
; CHECK-NEXT:    vle32.v v9, (a0)
@@ -91,7 +91,7 @@ define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: mul_constant_rhs:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vslide1down.vx v8, v8, a0
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI3_0)
; CHECK-NEXT:    vle32.v v9, (a0)
@@ -115,7 +115,7 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: udiv_constant_rhs:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vslide1down.vx v8, v8, a0
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI4_0)
; CHECK-NEXT:    vle32.v v9, (a0)
@@ -152,7 +152,7 @@ define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: fadd_constant_rhs:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
; CHECK-NEXT:    vfmv.v.f v8, fa0
; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_0)
; CHECK-NEXT:    vle32.v v9, (a0)
@@ -176,7 +176,7 @@ define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: fdiv_constant_rhs:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
; CHECK-NEXT:    vfmv.v.f v8, fa0
; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI6_0)
; CHECK-NEXT:    vle32.v v9, (a0)
@@ -200,7 +200,7 @@ define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_splat:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vslide1down.vx v8, v8, a0
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    vslide1down.vx v8, v8, a1
; CHECK-NEXT:    vslide1down.vx v8, v8, a2
; CHECK-NEXT:    vslide1down.vx v8, v8, a3
@@ -226,7 +226,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d)
; RV32-NEXT:    addi a3, a3, 2047
; RV32-NEXT:    addi a3, a3, 308
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vslide1down.vx v8, v8, a0
; RV32-NEXT:    vmv.v.x v8, a0
; RV32-NEXT:    vslide1down.vx v8, v8, a1
; RV32-NEXT:    vslide1down.vx v8, v8, a2
; RV32-NEXT:    vslide1down.vx v8, v8, a3
@@ -239,7 +239,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d)
; RV64-NEXT:    addi a3, a3, 2047
; RV64-NEXT:    addiw a3, a3, 308
; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT:    vslide1down.vx v8, v8, a0
; RV64-NEXT:    vmv.v.x v8, a0
; RV64-NEXT:    vslide1down.vx v8, v8, a1
; RV64-NEXT:    vslide1down.vx v8, v8, a2
; RV64-NEXT:    vslide1down.vx v8, v8, a3
@@ -263,7 +263,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
; RV32-NEXT:    addi a3, a3, 2047
; RV32-NEXT:    addi a3, a3, 308
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vslide1down.vx v8, v8, a0
; RV32-NEXT:    vmv.v.x v8, a0
; RV32-NEXT:    vslide1down.vx v8, v8, a1
; RV32-NEXT:    vslide1down.vx v8, v8, a2
; RV32-NEXT:    vslide1down.vx v8, v8, a3
@@ -276,7 +276,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
; RV64-NEXT:    addi a3, a3, 2047
; RV64-NEXT:    addiw a3, a3, 308
; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT:    vslide1down.vx v8, v8, a0
; RV64-NEXT:    vmv.v.x v8, a0
; RV64-NEXT:    vslide1down.vx v8, v8, a1
; RV64-NEXT:    vslide1down.vx v8, v8, a2
; RV64-NEXT:    vslide1down.vx v8, v8, a3
@@ -293,25 +293,15 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
}

define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) {
; RV32-LABEL: add_constant_rhs_identity2:
; RV32:       # %bb.0:
; RV32-NEXT:    addi a0, a0, 23
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vslide1down.vx v8, v8, a0
; RV32-NEXT:    vslide1down.vx v8, v8, a1
; RV32-NEXT:    vslide1down.vx v8, v8, a2
; RV32-NEXT:    vslide1down.vx v8, v8, a3
; RV32-NEXT:    ret
;
; RV64-LABEL: add_constant_rhs_identity2:
; RV64:       # %bb.0:
; RV64-NEXT:    addiw a0, a0, 23
; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT:    vslide1down.vx v8, v8, a0
; RV64-NEXT:    vslide1down.vx v8, v8, a1
; RV64-NEXT:    vslide1down.vx v8, v8, a2
; RV64-NEXT:    vslide1down.vx v8, v8, a3
; RV64-NEXT:    ret
; CHECK-LABEL: add_constant_rhs_identity2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addi a0, a0, 23
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    vslide1down.vx v8, v8, a1
; CHECK-NEXT:    vslide1down.vx v8, v8, a2
; CHECK-NEXT:    vslide1down.vx v8, v8, a3
; CHECK-NEXT:    ret
  %e0 = add i32 %a, 23
  %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
  %v1 = insertelement <4 x i32> %v0, i32 %b, i32 1
@@ -324,7 +314,7 @@ define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_inverse:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vslide1down.vx v8, v8, a0
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI11_0)
; CHECK-NEXT:    vle32.v v9, (a0)
@@ -348,7 +338,7 @@ define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_commute:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vslide1down.vx v8, v8, a0
; CHECK-NEXT:    vmv.v.x v8, a0
; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI12_0)
; CHECK-NEXT:    vle32.v v9, (a0)
@@ -377,7 +367,7 @@ define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f
; RV32-NEXT:    add a2, a2, a6
; RV32-NEXT:    add a3, a3, a7
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vslide1down.vx v8, v8, a0
; RV32-NEXT:    vmv.v.x v8, a0
; RV32-NEXT:    vslide1down.vx v8, v8, a1
; RV32-NEXT:    vslide1down.vx v8, v8, a2
; RV32-NEXT:    vslide1down.vx v8, v8, a3
@@ -385,12 +375,12 @@ define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f
;
; RV64-LABEL: add_general_rhs:
; RV64:       # %bb.0:
; RV64-NEXT:    addw a0, a0, a4
; RV64-NEXT:    add a0, a0, a4
; RV64-NEXT:    addw a1, a1, a5
; RV64-NEXT:    addw a2, a2, a6
; RV64-NEXT:    addw a3, a3, a7
; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT:    vslide1down.vx v8, v8, a0
; RV64-NEXT:    vmv.v.x v8, a0
; RV64-NEXT:    vslide1down.vx v8, v8, a1
; RV64-NEXT:    vslide1down.vx v8, v8, a2
; RV64-NEXT:    vslide1down.vx v8, v8, a3
@@ -414,7 +404,7 @@ define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
; RV32-NEXT:    add a2, a2, a4
; RV32-NEXT:    add a3, a3, a4
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vslide1down.vx v8, v8, a0
; RV32-NEXT:    vmv.v.x v8, a0
; RV32-NEXT:    vslide1down.vx v8, v8, a1
; RV32-NEXT:    vslide1down.vx v8, v8, a2
; RV32-NEXT:    vslide1down.vx v8, v8, a3
@@ -422,12 +412,12 @@ define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
;
; RV64-LABEL: add_general_splat:
; RV64:       # %bb.0:
; RV64-NEXT:    addw a0, a0, a4
; RV64-NEXT:    add a0, a0, a4
; RV64-NEXT:    addw a1, a1, a4
; RV64-NEXT:    addw a2, a2, a4
; RV64-NEXT:    addw a3, a3, a4
; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT:    vslide1down.vx v8, v8, a0
; RV64-NEXT:    vmv.v.x v8, a0
; RV64-NEXT:    vslide1down.vx v8, v8, a1
; RV64-NEXT:    vslide1down.vx v8, v8, a2
; RV64-NEXT:    vslide1down.vx v8, v8, a3
+6 −6
Original line number Diff line number Diff line
@@ -252,7 +252,7 @@ define <2 x half> @buildvec_v2f16(half %a, half %b) {
; CHECK-LABEL: buildvec_v2f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
; CHECK-NEXT:    vfmv.v.f v8, fa0
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
; CHECK-NEXT:    ret
  %v1 = insertelement <2 x half> poison, half %a, i64 0
@@ -264,7 +264,7 @@ define <2 x float> @buildvec_v2f32(float %a, float %b) {
; CHECK-LABEL: buildvec_v2f32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
; CHECK-NEXT:    vfmv.v.f v8, fa0
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
; CHECK-NEXT:    ret
  %v1 = insertelement <2 x float> poison, float %a, i64 0
@@ -276,7 +276,7 @@ define <2 x double> @buildvec_v2f64(double %a, double %b) {
; CHECK-LABEL: buildvec_v2f64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
; CHECK-NEXT:    vfmv.v.f v8, fa0
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
; CHECK-NEXT:    ret
  %v1 = insertelement <2 x double> poison, double %a, i64 0
@@ -288,7 +288,7 @@ define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
; CHECK-LABEL: buildvec_v2f64_b:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
; CHECK-NEXT:    vfmv.v.f v8, fa0
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
; CHECK-NEXT:    ret
  %v1 = insertelement <2 x double> poison, double %b, i64 1
@@ -300,7 +300,7 @@ define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: buildvec_v4f32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
; CHECK-NEXT:    vfmv.v.f v8, fa0
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
@@ -316,7 +316,7 @@ define <8 x float> @buildvec_v8f32(float %e0, float %e1, float %e2, float %e3, f
; CHECK-LABEL: buildvec_v8f32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
; CHECK-NEXT:    vfmv.v.f v8, fa0
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+30 −36
Original line number Diff line number Diff line
@@ -211,7 +211,7 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
; RV32-NEXT:    fcvt.w.d a3, fa5, rtz
; RV32-NEXT:    and a2, a2, a3
; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
; RV32-NEXT:    vslide1down.vx v8, v8, a2
; RV32-NEXT:    vmv.v.x v8, a2
; RV32-NEXT:    vslide1down.vx v8, v8, a0
; RV32-NEXT:    vse8.v v8, (a1)
; RV32-NEXT:    ret
@@ -234,13 +234,13 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
; RV64-NEXT:    and a0, a0, a2
; RV64-NEXT:    vfmv.f.s fa5, v8
; RV64-NEXT:    feq.d a2, fa5, fa5
; RV64-NEXT:    neg a2, a2
; RV64-NEXT:    negw a2, a2
; RV64-NEXT:    fmax.d fa5, fa5, fa4
; RV64-NEXT:    fmin.d fa5, fa5, fa3
; RV64-NEXT:    fcvt.l.d a3, fa5, rtz
; RV64-NEXT:    and a2, a2, a3
; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
; RV64-NEXT:    vslide1down.vx v8, v8, a2
; RV64-NEXT:    vmv.v.x v8, a2
; RV64-NEXT:    vslide1down.vx v8, v8, a0
; RV64-NEXT:    vse8.v v8, (a1)
; RV64-NEXT:    ret
@@ -256,23 +256,21 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
; RV32:       # %bb.0:
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vle64.v v8, (a0)
; RV32-NEXT:    vslidedown.vi v9, v8, 1
; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
; RV32-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
; RV32-NEXT:    vfmv.f.s fa4, v8
; RV32-NEXT:    vfmv.f.s fa4, v9
; RV32-NEXT:    fcvt.d.w fa3, zero
; RV32-NEXT:    fmax.d fa4, fa4, fa3
; RV32-NEXT:    fmin.d fa4, fa4, fa5
; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
; RV32-NEXT:    vslide1down.vx v9, v8, a0
; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT:    vslidedown.vi v8, v8, 1
; RV32-NEXT:    vfmv.f.s fa4, v8
; RV32-NEXT:    fmax.d fa4, fa4, fa3
; RV32-NEXT:    fmin.d fa5, fa4, fa5
; RV32-NEXT:    fcvt.wu.d a0, fa5, rtz
; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
; RV32-NEXT:    vslide1down.vx v8, v9, a0
; RV32-NEXT:    fcvt.wu.d a2, fa5, rtz
; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
; RV32-NEXT:    vmv.v.x v8, a2
; RV32-NEXT:    vslide1down.vx v8, v8, a0
; RV32-NEXT:    vse8.v v8, (a1)
; RV32-NEXT:    ret
;
@@ -280,23 +278,21 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
; RV64:       # %bb.0:
; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT:    vle64.v v8, (a0)
; RV64-NEXT:    vslidedown.vi v9, v8, 1
; RV64-NEXT:    lui a0, %hi(.LCPI11_0)
; RV64-NEXT:    fld fa5, %lo(.LCPI11_0)(a0)
; RV64-NEXT:    vfmv.f.s fa4, v8
; RV64-NEXT:    vfmv.f.s fa4, v9
; RV64-NEXT:    fmv.d.x fa3, zero
; RV64-NEXT:    fmax.d fa4, fa4, fa3
; RV64-NEXT:    fmin.d fa4, fa4, fa5
; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
; RV64-NEXT:    vslide1down.vx v9, v8, a0
; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT:    vslidedown.vi v8, v8, 1
; RV64-NEXT:    vfmv.f.s fa4, v8
; RV64-NEXT:    fmax.d fa4, fa4, fa3
; RV64-NEXT:    fmin.d fa5, fa4, fa5
; RV64-NEXT:    fcvt.lu.d a0, fa5, rtz
; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
; RV64-NEXT:    vslide1down.vx v8, v9, a0
; RV64-NEXT:    fcvt.lu.d a2, fa5, rtz
; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
; RV64-NEXT:    vmv.v.x v8, a2
; RV64-NEXT:    vslide1down.vx v8, v8, a0
; RV64-NEXT:    vse8.v v8, (a1)
; RV64-NEXT:    ret
  %a = load <2 x double>, ptr %x
@@ -344,7 +340,7 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT:    fcvt.w.d a3, fa3, rtz
; RV32-NEXT:    and a2, a2, a3
; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT:    vslide1down.vx v10, v8, a2
; RV32-NEXT:    vmv.v.x v10, a2
; RV32-NEXT:    vslide1down.vx v10, v10, a0
; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT:    vslidedown.vi v12, v8, 2
@@ -437,13 +433,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT:    and a0, a0, a2
; RV64-NEXT:    vfmv.f.s fa3, v8
; RV64-NEXT:    feq.d a2, fa3, fa3
; RV64-NEXT:    neg a2, a2
; RV64-NEXT:    negw a2, a2
; RV64-NEXT:    fmax.d fa3, fa3, fa5
; RV64-NEXT:    fmin.d fa3, fa3, fa4
; RV64-NEXT:    fcvt.l.d a3, fa3, rtz
; RV64-NEXT:    and a2, a2, a3
; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT:    vslide1down.vx v10, v8, a2
; RV64-NEXT:    vmv.v.x v10, a2
; RV64-NEXT:    vslide1down.vx v10, v10, a0
; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT:    vslidedown.vi v12, v8, 2
@@ -529,22 +525,21 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT:    vle64.v v8, (a0)
; RV32-NEXT:    mv a0, sp
; RV32-NEXT:    vse64.v v8, (a0)
; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT:    vslidedown.vi v10, v8, 1
; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
; RV32-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
; RV32-NEXT:    vfmv.f.s fa4, v8
; RV32-NEXT:    vfmv.f.s fa4, v10
; RV32-NEXT:    fcvt.d.w fa3, zero
; RV32-NEXT:    fmax.d fa4, fa4, fa3
; RV32-NEXT:    fmin.d fa4, fa4, fa5
; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
; RV32-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
; RV32-NEXT:    vslide1down.vx v10, v8, a0
; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT:    vslidedown.vi v11, v8, 1
; RV32-NEXT:    vfmv.f.s fa4, v11
; RV32-NEXT:    vfmv.f.s fa4, v8
; RV32-NEXT:    fmax.d fa4, fa4, fa3
; RV32-NEXT:    fmin.d fa4, fa4, fa5
; RV32-NEXT:    fcvt.wu.d a0, fa4, rtz
; RV32-NEXT:    fcvt.wu.d a2, fa4, rtz
; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT:    vmv.v.x v10, a2
; RV32-NEXT:    vslide1down.vx v10, v10, a0
; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT:    vslidedown.vi v12, v8, 2
@@ -604,22 +599,21 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT:    vle64.v v8, (a0)
; RV64-NEXT:    mv a0, sp
; RV64-NEXT:    vse64.v v8, (a0)
; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT:    vslidedown.vi v10, v8, 1
; RV64-NEXT:    lui a0, %hi(.LCPI13_0)
; RV64-NEXT:    fld fa5, %lo(.LCPI13_0)(a0)
; RV64-NEXT:    vfmv.f.s fa4, v8
; RV64-NEXT:    vfmv.f.s fa4, v10
; RV64-NEXT:    fmv.d.x fa3, zero
; RV64-NEXT:    fmax.d fa4, fa4, fa3
; RV64-NEXT:    fmin.d fa4, fa4, fa5
; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
; RV64-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
; RV64-NEXT:    vslide1down.vx v10, v8, a0
; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT:    vslidedown.vi v11, v8, 1
; RV64-NEXT:    vfmv.f.s fa4, v11
; RV64-NEXT:    vfmv.f.s fa4, v8
; RV64-NEXT:    fmax.d fa4, fa4, fa3
; RV64-NEXT:    fmin.d fa4, fa4, fa5
; RV64-NEXT:    fcvt.lu.d a0, fa4, rtz
; RV64-NEXT:    fcvt.lu.d a2, fa4, rtz
; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT:    vmv.v.x v10, a2
; RV64-NEXT:    vslide1down.vx v10, v10, a0
; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT:    vslidedown.vi v12, v8, 2
Loading