Unverified Commit 8b8c271f authored by Dhruva's avatar Dhruva Committed by GitHub
Browse files

[DAG] computeKnownFPClass - add ISD::EXTRACT_SUBVECTOR/INSERT_SUBVECTOR handling. (#190378)

This patch implements `ISD::EXTRACT_SUBVECTOR` and
`ISD::INSERT_SUBVECTOR` handling in `SelectionDAG::computeKnownFPClass`.
Fixes: #189586
parent 857baf37
Loading
Loading
Loading
Loading
+44 −0
Original line number Diff line number Diff line
@@ -6159,6 +6159,50 @@ KnownFPClass SelectionDAG::computeKnownFPClass(SDValue Op,
    Known.KnownFPClasses &= ~AssertedClasses;
    break;
  }
  case ISD::EXTRACT_SUBVECTOR: {
    SDValue Src = Op.getOperand(0);
    EVT SrcVT = Src.getValueType();
    if (SrcVT.isFixedLengthVector()) {
      unsigned Idx = Op.getConstantOperandVal(1);
      unsigned NumSrcElts = SrcVT.getVectorNumElements();
      APInt DemandedSrcElts = DemandedElts.zextOrTrunc(NumSrcElts).shl(Idx);
      Known = computeKnownFPClass(Src, DemandedSrcElts, InterestedClasses,
                                  Depth + 1);
    } else {
      Known = computeKnownFPClass(Src, InterestedClasses, Depth + 1);
    }
    break;
  }
  case ISD::INSERT_SUBVECTOR: {
    SDValue BaseVector = Op.getOperand(0);
    SDValue SubVector = Op.getOperand(1);
    EVT BaseVT = BaseVector.getValueType();
    if (BaseVT.isFixedLengthVector()) {
      unsigned Idx = Op.getConstantOperandVal(2);
      unsigned NumBaseElts = BaseVT.getVectorNumElements();
      unsigned NumSubElts = SubVector.getValueType().getVectorNumElements();
      APInt DemandedMask =
          APInt::getBitsSet(NumBaseElts, Idx, Idx + NumSubElts);
      APInt DemandedSrcElts = DemandedElts & ~DemandedMask;
      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
      if (!DemandedSrcElts.isZero())
        Known = computeKnownFPClass(BaseVector, DemandedSrcElts,
                                    InterestedClasses, Depth + 1);
      if (!DemandedSubElts.isZero()) {
        KnownFPClass SubKnown = computeKnownFPClass(
            SubVector, DemandedSubElts, InterestedClasses, Depth + 1);
        Known = DemandedSrcElts.isZero() ? SubKnown : (Known | SubKnown);
      }
    } else {
      Known = computeKnownFPClass(SubVector, InterestedClasses, Depth + 1);
      if (!Known.isUnknown())
        Known |= computeKnownFPClass(BaseVector, InterestedClasses, Depth + 1);
    }
    break;
  }
  case ISD::SELECT:
  case ISD::VSELECT: {
    // TODO: Add adjustKnownFPClassForSelectArm clamp recognition as in
+12 −45
Original line number Diff line number Diff line
@@ -333,12 +333,8 @@ define i1 @copysign_unknown_sign_no_fold(float %x, float %y) nounwind {
define <vscale x 4 x i1> @extract_subvec_scalable_isneg_false(<vscale x 8 x float> %a0) {
; CHECK-LABEL: extract_subvec_scalable_isneg_false:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
; CHECK-NEXT:    vfabs.v v8, v8
; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT:    vfclass.v v8, v8
; CHECK-NEXT:    vand.vi v8, v8, 15
; CHECK-NEXT:    vmsne.vi v0, v8, 0
; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT:    vmclr.m v0
; CHECK-NEXT:    ret
  %abs = call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> %a0)
  %sub = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %abs, i64 0)
@@ -349,12 +345,8 @@ define <vscale x 4 x i1> @extract_subvec_scalable_isneg_false(<vscale x 8 x floa
define <2 x i1> @extract_subvec_fixed_isneg_false(<4 x float> %a0) {
; CHECK-LABEL: extract_subvec_fixed_isneg_false:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vfabs.v v8, v8
; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT:    vfclass.v v8, v8
; CHECK-NEXT:    vand.vi v8, v8, 15
; CHECK-NEXT:    vmsne.vi v0, v8, 0
; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT:    vmclr.m v0
; CHECK-NEXT:    ret
  %abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a0)
  %sub = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> %abs, i64 0)
@@ -365,14 +357,8 @@ define <2 x i1> @extract_subvec_fixed_isneg_false(<4 x float> %a0) {
define <vscale x 4 x i1> @insert_subvec_scalable_both_isneg_false(<vscale x 4 x float> %base, <vscale x 2 x float> %sub) {
; CHECK-LABEL: insert_subvec_scalable_both_isneg_false:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT:    vfabs.v v8, v8
; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT:    vfabs.v v8, v10
; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT:    vfclass.v v8, v8
; CHECK-NEXT:    vand.vi v8, v8, 15
; CHECK-NEXT:    vmsne.vi v0, v8, 0
; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT:    vmclr.m v0
; CHECK-NEXT:    ret
  %absbase = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %base)
  %abssub  = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %sub)
@@ -384,16 +370,8 @@ define <vscale x 4 x i1> @insert_subvec_scalable_both_isneg_false(<vscale x 4 x
define <4 x i1> @insert_subvec_fixed_both_isneg_false(<4 x float> %base, <2 x float> %sub) {
; CHECK-LABEL: insert_subvec_fixed_both_isneg_false:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vfabs.v v8, v8
; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT:    vfabs.v v9, v9
; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v9
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vfclass.v v8, v8
; CHECK-NEXT:    vand.vi v8, v8, 15
; CHECK-NEXT:    vmsne.vi v0, v8, 0
; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT:    vmclr.m v0
; CHECK-NEXT:    ret
  %absbase = call <4 x float> @llvm.fabs.v4f32(<4 x float> %base)
  %abssub  = call <2 x float> @llvm.fabs.v2f32(<2 x float> %sub)
@@ -405,12 +383,8 @@ define <4 x i1> @insert_subvec_fixed_both_isneg_false(<4 x float> %base, <2 x fl
define <2 x i1> @extract_subvec_fixed_mixed_to_pos_isneg_false(<4 x float> %a) {
; CHECK-LABEL: extract_subvec_fixed_mixed_to_pos_isneg_false:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vfabs.v v8, v8
; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT:    vfclass.v v8, v8
; CHECK-NEXT:    vand.vi v8, v8, 15
; CHECK-NEXT:    vmsne.vi v0, v8, 0
; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT:    vmclr.m v0
; CHECK-NEXT:    ret
  %abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
  %neg = fneg <4 x float> %abs
@@ -463,15 +437,8 @@ define <4 x i1> @insert_subvec_fixed_pos_base_neg_sub_isneg_unknown(<4 x float>
define <4 x i1> @insert_subvec_fixed_pos_base_pos_sub_isneg_false(<4 x float> %base, <2 x float> %sub) {
; CHECK-LABEL: insert_subvec_fixed_pos_base_pos_sub_isneg_false:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vfabs.v v8, v8
; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT:    vfabs.v v9, v9
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vslideup.vi v8, v9, 2
; CHECK-NEXT:    vfclass.v v8, v8
; CHECK-NEXT:    vand.vi v8, v8, 15
; CHECK-NEXT:    vmsne.vi v0, v8, 0
; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT:    vmclr.m v0
; CHECK-NEXT:    ret
  %abs_base = call <4 x float> @llvm.fabs.v4f32(<4 x float> %base)
  %abs_sub = call <2 x float> @llvm.fabs.v2f32(<2 x float> %sub)