Unverified Commit bd675f58 authored by Luke Lau's avatar Luke Lau Committed by GitHub
Browse files

[RISCV] Reduce LMUL when index is known when lowering insert_vector_elt (#66087)

Continuing on from #65997, if the index of insert_vector_elt is a
constant then we can work out what the minimum number of registers will
be needed for the slideup and choose a smaller type to operate on.

This reduces the LMUL for not just the slideup but also for the scalar
insert.
parent 8e6db7e2
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
@@ -7458,6 +7458,19 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
    Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
  }
  MVT OrigContainerVT = ContainerVT;
  SDValue OrigVec = Vec;
  // If we know the index we're going to insert at, we can shrink Vec so that
  // we're performing the scalar inserts and slideup on a smaller LMUL.
  if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
    if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, CIdx->getZExtValue(),
                                              DL, DAG, Subtarget)) {
      ContainerVT = *ShrunkVT;
      Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
                        DAG.getVectorIdxConstant(0, DL));
    }
  }
  MVT XLenVT = Subtarget.getXLenVT();
  bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
@@ -7482,6 +7495,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
        VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
    if (isNullConstant(Idx)) {
      Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
      if (ContainerVT != OrigContainerVT)
        Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
                          Vec, DAG.getVectorIdxConstant(0, DL));
      if (!VecVT.isFixedLengthVector())
        return Vec;
      return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
@@ -7514,6 +7531,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
      // Bitcast back to the right container type.
      ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
      if (ContainerVT != OrigContainerVT)
        ValInVec =
            DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
                        ValInVec, DAG.getVectorIdxConstant(0, DL));
      if (!VecVT.isFixedLengthVector())
        return ValInVec;
      return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
@@ -7544,6 +7565,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
    Policy = RISCVII::TAIL_AGNOSTIC;
  SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
                                Idx, Mask, InsertVL, Policy);
  if (ContainerVT != OrigContainerVT)
    Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
                          Slideup, DAG.getVectorIdxConstant(0, DL));
  if (!VecVT.isFixedLengthVector())
    return Slideup;
  return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
+1 −1
Original line number Diff line number Diff line
@@ -108,7 +108,7 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind {
; CHECK-NEXT:    vmv.v.i v8, 0
; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
; CHECK-NEXT:    vmv.s.x v12, a0
; CHECK-NEXT:    vsetivli zero, 2, e8, m4, tu, ma
; CHECK-NEXT:    vsetivli zero, 2, e8, m1, tu, ma
; CHECK-NEXT:    vslideup.vi v8, v12, 1
; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT:    vand.vi v8, v8, 1
+7 −7
Original line number Diff line number Diff line
@@ -40,7 +40,7 @@ define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v32i32_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    li a1, 32
; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, ma
; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, ma
; CHECK-NEXT:    vmv.s.x v8, a0
; CHECK-NEXT:    ret
  %b = insertelement <32 x i32> %a, i32 %y, i32 0
@@ -54,7 +54,7 @@ define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
; CHECK-NEXT:    li a1, 32
; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT:    vmv.s.x v16, a0
; CHECK-NEXT:    vsetivli zero, 5, e32, m8, tu, ma
; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
; CHECK-NEXT:    vslideup.vi v8, v16, 4
; CHECK-NEXT:    ret
  %b = insertelement <32 x i32> %a, i32 %y, i32 4
@@ -92,7 +92,7 @@ define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v64i32_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    li a1, 32
; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, ma
; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, ma
; CHECK-NEXT:    vmv.s.x v8, a0
; CHECK-NEXT:    ret
  %b = insertelement <64 x i32> %a, i32 %y, i32 0
@@ -390,7 +390,7 @@ define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) {
; CHECK-LABEL: insertelt_v8i64_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    li a0, -1
; CHECK-NEXT:    vsetivli zero, 8, e64, m4, tu, ma
; CHECK-NEXT:    vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT:    vmv.s.x v8, a0
; CHECK-NEXT:    ret
  %b = insertelement <8 x i64> %a, i64 -1, i32 0
@@ -468,7 +468,7 @@ define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) {
; CHECK-LABEL: insertelt_c6_v8i64_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    li a0, 6
; CHECK-NEXT:    vsetivli zero, 8, e64, m4, tu, ma
; CHECK-NEXT:    vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT:    vmv.s.x v8, a0
; CHECK-NEXT:    ret
  %b = insertelement <8 x i64> %a, i64 6, i32 0
@@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    li a2, 6
; CHECK-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
; CHECK-NEXT:    vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT:    vmv.s.x v8, a2
; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT:    vle64.v v12, (a1)
; CHECK-NEXT:    vadd.vv v8, v8, v12
; CHECK-NEXT:    vse64.v v8, (a0)
+146 −146

File changed.

Preview size limit exceeded, changes collapsed.

+17 −17
Original line number Diff line number Diff line
@@ -109,7 +109,7 @@ define <vscale x 4 x half> @insertelt_nxv4f16_idx(<vscale x 4 x half> %v, half %
define <vscale x 8 x half> @insertelt_nxv8f16_0(<vscale x 8 x half> %v, half %elt) {
; CHECK-LABEL: insertelt_nxv8f16_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e16, m2, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e16, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 8 x half> %v, half %elt, i32 0
@@ -119,7 +119,7 @@ define <vscale x 8 x half> @insertelt_nxv8f16_0(<vscale x 8 x half> %v, half %el
define <vscale x 8 x half> @insertelt_nxv8f16_imm(<vscale x 8 x half> %v, half %elt) {
; CHECK-LABEL: insertelt_nxv8f16_imm:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v10, fa0
; CHECK-NEXT:    vslideup.vi v8, v10, 3
; CHECK-NEXT:    ret
@@ -143,7 +143,7 @@ define <vscale x 8 x half> @insertelt_nxv8f16_idx(<vscale x 8 x half> %v, half %
define <vscale x 16 x half> @insertelt_nxv16f16_0(<vscale x 16 x half> %v, half %elt) {
; CHECK-LABEL: insertelt_nxv16f16_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e16, m4, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e16, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 16 x half> %v, half %elt, i32 0
@@ -153,7 +153,7 @@ define <vscale x 16 x half> @insertelt_nxv16f16_0(<vscale x 16 x half> %v, half
define <vscale x 16 x half> @insertelt_nxv16f16_imm(<vscale x 16 x half> %v, half %elt) {
; CHECK-LABEL: insertelt_nxv16f16_imm:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e16, m4, tu, ma
; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v12, fa0
; CHECK-NEXT:    vslideup.vi v8, v12, 3
; CHECK-NEXT:    ret
@@ -177,7 +177,7 @@ define <vscale x 16 x half> @insertelt_nxv16f16_idx(<vscale x 16 x half> %v, hal
define <vscale x 32 x half> @insertelt_nxv32f16_0(<vscale x 32 x half> %v, half %elt) {
; CHECK-LABEL: insertelt_nxv32f16_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e16, m8, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e16, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 32 x half> %v, half %elt, i32 0
@@ -187,7 +187,7 @@ define <vscale x 32 x half> @insertelt_nxv32f16_0(<vscale x 32 x half> %v, half
define <vscale x 32 x half> @insertelt_nxv32f16_imm(<vscale x 32 x half> %v, half %elt) {
; CHECK-LABEL: insertelt_nxv32f16_imm:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e16, m8, tu, ma
; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v16, fa0
; CHECK-NEXT:    vslideup.vi v8, v16, 3
; CHECK-NEXT:    ret
@@ -279,7 +279,7 @@ define <vscale x 2 x float> @insertelt_nxv2f32_idx(<vscale x 2 x float> %v, floa
define <vscale x 4 x float> @insertelt_nxv4f32_0(<vscale x 4 x float> %v, float %elt) {
; CHECK-LABEL: insertelt_nxv4f32_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e32, m2, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e32, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 4 x float> %v, float %elt, i32 0
@@ -289,7 +289,7 @@ define <vscale x 4 x float> @insertelt_nxv4f32_0(<vscale x 4 x float> %v, float
define <vscale x 4 x float> @insertelt_nxv4f32_imm(<vscale x 4 x float> %v, float %elt) {
; CHECK-LABEL: insertelt_nxv4f32_imm:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m2, tu, ma
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v10, fa0
; CHECK-NEXT:    vslideup.vi v8, v10, 3
; CHECK-NEXT:    ret
@@ -313,7 +313,7 @@ define <vscale x 4 x float> @insertelt_nxv4f32_idx(<vscale x 4 x float> %v, floa
define <vscale x 8 x float> @insertelt_nxv8f32_0(<vscale x 8 x float> %v, float %elt) {
; CHECK-LABEL: insertelt_nxv8f32_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e32, m4, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e32, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 8 x float> %v, float %elt, i32 0
@@ -323,7 +323,7 @@ define <vscale x 8 x float> @insertelt_nxv8f32_0(<vscale x 8 x float> %v, float
define <vscale x 8 x float> @insertelt_nxv8f32_imm(<vscale x 8 x float> %v, float %elt) {
; CHECK-LABEL: insertelt_nxv8f32_imm:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, ma
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v12, fa0
; CHECK-NEXT:    vslideup.vi v8, v12, 3
; CHECK-NEXT:    ret
@@ -347,7 +347,7 @@ define <vscale x 8 x float> @insertelt_nxv8f32_idx(<vscale x 8 x float> %v, floa
define <vscale x 16 x float> @insertelt_nxv16f32_0(<vscale x 16 x float> %v, float %elt) {
; CHECK-LABEL: insertelt_nxv16f32_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e32, m8, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e32, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 16 x float> %v, float %elt, i32 0
@@ -357,7 +357,7 @@ define <vscale x 16 x float> @insertelt_nxv16f32_0(<vscale x 16 x float> %v, flo
define <vscale x 16 x float> @insertelt_nxv16f32_imm(<vscale x 16 x float> %v, float %elt) {
; CHECK-LABEL: insertelt_nxv16f32_imm:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m8, tu, ma
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v16, fa0
; CHECK-NEXT:    vslideup.vi v8, v16, 3
; CHECK-NEXT:    ret
@@ -415,7 +415,7 @@ define <vscale x 1 x double> @insertelt_nxv1f64_idx(<vscale x 1 x double> %v, do
define <vscale x 2 x double> @insertelt_nxv2f64_0(<vscale x 2 x double> %v, double %elt) {
; CHECK-LABEL: insertelt_nxv2f64_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e64, m2, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e64, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 2 x double> %v, double %elt, i32 0
@@ -449,7 +449,7 @@ define <vscale x 2 x double> @insertelt_nxv2f64_idx(<vscale x 2 x double> %v, do
define <vscale x 4 x double> @insertelt_nxv4f64_0(<vscale x 4 x double> %v, double %elt) {
; CHECK-LABEL: insertelt_nxv4f64_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e64, m4, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e64, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 4 x double> %v, double %elt, i32 0
@@ -459,7 +459,7 @@ define <vscale x 4 x double> @insertelt_nxv4f64_0(<vscale x 4 x double> %v, doub
define <vscale x 4 x double> @insertelt_nxv4f64_imm(<vscale x 4 x double> %v, double %elt) {
; CHECK-LABEL: insertelt_nxv4f64_imm:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e64, m4, tu, ma
; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT:    vfmv.s.f v12, fa0
; CHECK-NEXT:    vslideup.vi v8, v12, 3
; CHECK-NEXT:    ret
@@ -483,7 +483,7 @@ define <vscale x 4 x double> @insertelt_nxv4f64_idx(<vscale x 4 x double> %v, do
define <vscale x 8 x double> @insertelt_nxv8f64_0(<vscale x 8 x double> %v, double %elt) {
; CHECK-LABEL: insertelt_nxv8f64_0:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e64, m8, tu, ma
; CHECK-NEXT:    vsetvli a0, zero, e64, m1, tu, ma
; CHECK-NEXT:    vfmv.s.f v8, fa0
; CHECK-NEXT:    ret
  %r = insertelement <vscale x 8 x double> %v, double %elt, i32 0
@@ -493,7 +493,7 @@ define <vscale x 8 x double> @insertelt_nxv8f64_0(<vscale x 8 x double> %v, doub
define <vscale x 8 x double> @insertelt_nxv8f64_imm(<vscale x 8 x double> %v, double %elt) {
; CHECK-LABEL: insertelt_nxv8f64_imm:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e64, m8, tu, ma
; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT:    vfmv.s.f v16, fa0
; CHECK-NEXT:    vslideup.vi v8, v16, 3
; CHECK-NEXT:    ret
Loading