Commit f67d93dc authored by David Green's avatar David Green
Browse files

[ARM] Constant long shift combines

This changes the way that asrl and lsrl intrinsics are lowered, going
via a the ISEL ASRL and LSLL nodes instead of straight to machine nodes.
On top of that, it adds some constant folds for long shifts, in case it
turns out that the shift amount was either constant or 0.

Differential Revision: https://reviews.llvm.org/D75553
parent ecd3e678
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -4681,12 +4681,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
    case Intrinsic::arm_mve_sqrshrl:
      SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
      return;
    case Intrinsic::arm_mve_lsll:
      SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false);
      return;
    case Intrinsic::arm_mve_asrl:
      SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false);
      return;

    case Intrinsic::arm_mve_vadc:
    case Intrinsic::arm_mve_vadc_predicated:
+38 −0
Original line number Diff line number Diff line
@@ -3808,6 +3808,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
  case Intrinsic::arm_mve_vreinterpretq:
    return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
                       Op.getOperand(1));
  case Intrinsic::arm_mve_lsll:
    return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
  case Intrinsic::arm_mve_asrl:
    return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
  }
}
@@ -14138,6 +14144,34 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
  return SDValue();
}
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
  SDLoc DL(N);
  SDValue Op0 = N->getOperand(0);
  SDValue Op1 = N->getOperand(1);
  // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
  // uses of the intrinsics.
  if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
    int ShiftAmt = C->getSExtValue();
    if (ShiftAmt == 0) {
      SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
      DAG.ReplaceAllUsesWith(N, Merge.getNode());
      return SDValue();
    }
    if (ShiftAmt >= -32 && ShiftAmt < 0) {
      unsigned NewOpcode =
          N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
      SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
                                     DAG.getConstant(-ShiftAmt, DL, MVT::i32));
      DAG.ReplaceAllUsesWith(N, NewShift.getNode());
      return NewShift;
    }
  }
  return SDValue();
}
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
@@ -15033,6 +15067,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
    return PerformVCMPCombine(N, DCI, Subtarget);
  case ISD::VECREDUCE_ADD:
    return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
  case ARMISD::ASRL:
  case ARMISD::LSRL:
  case ARMISD::LSLL:
    return PerformLongShiftCombine(N, DCI.DAG);
  case ARMISD::SMULWB: {
    unsigned BitWidth = N->getValueType(0).getSizeInBits();
    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+8 −20
Original line number Diff line number Diff line
@@ -7,8 +7,6 @@ declare {i32, i32} @llvm.arm.mve.lsll(i32, i32, i32)
define i64 @asrl_0(i64 %X) {
; CHECK-LABEL: asrl_0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #0
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -27,8 +25,7 @@ entry:
define i64 @asrl_23(i64 %X) {
; CHECK-LABEL: asrl_23:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #23
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #23
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -47,8 +44,7 @@ entry:
define i64 @asrl_32(i64 %X) {
; CHECK-LABEL: asrl_32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #32
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #32
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -127,8 +123,7 @@ entry:
define i64 @asrl_m2(i64 %X) {
; CHECK-LABEL: asrl_m2:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #1
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #2
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -147,8 +142,7 @@ entry:
define i64 @asrl_m32(i64 %X) {
; CHECK-LABEL: asrl_m32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #31
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #32
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -210,8 +204,6 @@ entry:
define i64 @lsll_0(i64 %X) {
; CHECK-LABEL: lsll_0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #0
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -230,8 +222,7 @@ entry:
define i64 @lsll_23(i64 %X) {
; CHECK-LABEL: lsll_23:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #23
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #23
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -250,8 +241,7 @@ entry:
define i64 @lsll_32(i64 %X) {
; CHECK-LABEL: lsll_32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #32
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #32
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -330,8 +320,7 @@ entry:
define i64 @lsll_m2(i64 %X) {
; CHECK-LABEL: lsll_m2:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #1
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #2
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -350,8 +339,7 @@ entry:
define i64 @lsll_m32(i64 %X) {
; CHECK-LABEL: lsll_m32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #31
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #32
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
+32 −64
Original line number Diff line number Diff line
@@ -7,8 +7,7 @@ declare {i32, i32} @llvm.arm.mve.lsll(i32, i32, i32)
define i32 @ashr_demand_bottom3(i64 %X) {
; CHECK-LABEL: ashr_demand_bottom3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #3
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #3
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -28,8 +27,7 @@ entry:
define i32 @lsll_demand_bottom3(i64 %X) {
; CHECK-LABEL: lsll_demand_bottom3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #3
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #3
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -49,8 +47,7 @@ entry:
define i32 @ashr_demand_bottomm3(i64 %X) {
; CHECK-LABEL: ashr_demand_bottomm3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #2
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #3
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -70,8 +67,7 @@ entry:
define i32 @lsll_demand_bottomm3(i64 %X) {
; CHECK-LABEL: lsll_demand_bottomm3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #2
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #3
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -92,8 +88,7 @@ entry:
define i32 @ashr_demand_bottom31(i64 %X) {
; CHECK-LABEL: ashr_demand_bottom31:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #31
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #31
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -113,8 +108,7 @@ entry:
define i32 @lsll_demand_bottom31(i64 %X) {
; CHECK-LABEL: lsll_demand_bottom31:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #31
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #31
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -134,8 +128,7 @@ entry:
define i32 @ashr_demand_bottomm31(i64 %X) {
; CHECK-LABEL: ashr_demand_bottomm31:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #30
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #31
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -155,8 +148,7 @@ entry:
define i32 @lsll_demand_bottomm31(i64 %X) {
; CHECK-LABEL: lsll_demand_bottomm31:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #30
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #31
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -177,8 +169,7 @@ entry:
define i32 @ashr_demand_bottom32(i64 %X) {
; CHECK-LABEL: ashr_demand_bottom32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #32
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #32
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -198,8 +189,7 @@ entry:
define i32 @lsll_demand_bottom32(i64 %X) {
; CHECK-LABEL: lsll_demand_bottom32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #32
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #32
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -219,8 +209,7 @@ entry:
define i32 @ashr_demand_bottomm32(i64 %X) {
; CHECK-LABEL: ashr_demand_bottomm32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #31
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #32
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -240,8 +229,7 @@ entry:
define i32 @lsll_demand_bottomm32(i64 %X) {
; CHECK-LABEL: lsll_demand_bottomm32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #31
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #32
; CHECK-NEXT:    bx lr
entry:
  %0 = lshr i64 %X, 32
@@ -352,8 +340,7 @@ entry:
define i32 @ashr_demand_top3(i64 %X) {
; CHECK-LABEL: ashr_demand_top3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #3
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #3
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -375,8 +362,7 @@ entry:
define i32 @lsll_demand_top3(i64 %X) {
; CHECK-LABEL: lsll_demand_top3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #3
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #3
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -398,8 +384,7 @@ entry:
define i32 @ashr_demand_topm3(i64 %X) {
; CHECK-LABEL: ashr_demand_topm3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #2
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #3
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -421,8 +406,7 @@ entry:
define i32 @lsll_demand_topm3(i64 %X) {
; CHECK-LABEL: lsll_demand_topm3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #2
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #3
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -445,8 +429,7 @@ entry:
define i32 @ashr_demand_top31(i64 %X) {
; CHECK-LABEL: ashr_demand_top31:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #31
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #31
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -468,8 +451,7 @@ entry:
define i32 @lsll_demand_top31(i64 %X) {
; CHECK-LABEL: lsll_demand_top31:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #31
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #31
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -491,8 +473,7 @@ entry:
define i32 @ashr_demand_topm31(i64 %X) {
; CHECK-LABEL: ashr_demand_topm31:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #30
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #31
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -514,8 +495,7 @@ entry:
define i32 @lsll_demand_topm31(i64 %X) {
; CHECK-LABEL: lsll_demand_topm31:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #30
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #31
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -538,8 +518,7 @@ entry:
define i32 @ashr_demand_top32(i64 %X) {
; CHECK-LABEL: ashr_demand_top32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #32
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #32
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -561,8 +540,7 @@ entry:
define i32 @lsll_demand_top32(i64 %X) {
; CHECK-LABEL: lsll_demand_top32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #32
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #32
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -584,8 +562,7 @@ entry:
define i32 @ashr_demand_topm32(i64 %X) {
; CHECK-LABEL: ashr_demand_topm32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #31
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #32
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -607,8 +584,7 @@ entry:
define i32 @lsll_demand_topm32(i64 %X) {
; CHECK-LABEL: lsll_demand_topm32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #31
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #32
; CHECK-NEXT:    mov r0, r1
; CHECK-NEXT:    bx lr
entry:
@@ -725,8 +701,7 @@ entry:
define i32 @ashr_demand_bottommask3(i64 %X) {
; CHECK-LABEL: ashr_demand_bottommask3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #3
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #3
; CHECK-NEXT:    bic r0, r0, #1
; CHECK-NEXT:    bx lr
entry:
@@ -748,8 +723,7 @@ entry:
define i32 @lsll_demand_bottommask3(i64 %X) {
; CHECK-LABEL: lsll_demand_bottommask3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #3
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #3
; CHECK-NEXT:    bic r0, r0, #1
; CHECK-NEXT:    bx lr
entry:
@@ -771,8 +745,7 @@ entry:
define i32 @ashr_demand_bottommaskm3(i64 %X) {
; CHECK-LABEL: ashr_demand_bottommaskm3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #2
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #3
; CHECK-NEXT:    bic r0, r0, #1
; CHECK-NEXT:    bx lr
entry:
@@ -794,8 +767,7 @@ entry:
define i32 @lsll_demand_bottommaskm3(i64 %X) {
; CHECK-LABEL: lsll_demand_bottommaskm3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #2
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #3
; CHECK-NEXT:    bic r0, r0, #1
; CHECK-NEXT:    bx lr
entry:
@@ -818,8 +790,7 @@ entry:
define i32 @ashr_demand_bottommask32(i64 %X) {
; CHECK-LABEL: ashr_demand_bottommask32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #32
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    asrl r0, r1, #32
; CHECK-NEXT:    bic r0, r0, #1
; CHECK-NEXT:    bx lr
entry:
@@ -841,8 +812,7 @@ entry:
define i32 @lsll_demand_bottommask32(i64 %X) {
; CHECK-LABEL: lsll_demand_bottommask32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    movs r2, #32
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #32
; CHECK-NEXT:    bic r0, r0, #1
; CHECK-NEXT:    bx lr
entry:
@@ -864,8 +834,7 @@ entry:
define i32 @ashr_demand_bottommaskm32(i64 %X) {
; CHECK-LABEL: ashr_demand_bottommaskm32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #31
; CHECK-NEXT:    asrl r0, r1, r2
; CHECK-NEXT:    lsll r0, r1, #32
; CHECK-NEXT:    bic r0, r0, #1
; CHECK-NEXT:    bx lr
entry:
@@ -887,8 +856,7 @@ entry:
define i32 @lsll_demand_bottommaskm32(i64 %X) {
; CHECK-LABEL: lsll_demand_bottommaskm32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    mvn r2, #31
; CHECK-NEXT:    lsll r0, r1, r2
; CHECK-NEXT:    lsrl r0, r1, #32
; CHECK-NEXT:    bic r0, r0, #1
; CHECK-NEXT:    bx lr
entry: