Commit 69bc0990 authored by Simon Pilgrim's avatar Simon Pilgrim
Browse files

[DAGCombiner] Enable SimplifyDemandedBits vector support for TRUNCATE (REAPPLIED).

Add DemandedElts support inside the TRUNCATE analysis.

REAPPLIED - this was reverted by @hans at rGa51226057fc3 due to an issue with vector shift amount types, which was fixed in rG935bacd3a724 and an additional test case added at rG0ca81b90d19d

Differential Revision: https://reviews.llvm.org/D56387
parent 294e2544
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -11952,8 +11952,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
  }
  // Simplify the operands using demanded-bits information.
  if (!VT.isVector() &&
      SimplifyDemandedBits(SDValue(N, 0)))
  if (SimplifyDemandedBits(SDValue(N, 0)))
    return SDValue(N, 0);
  // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+5 −4
Original line number Diff line number Diff line
@@ -1986,7 +1986,8 @@ bool TargetLowering::SimplifyDemandedBits(
    // zero/one bits live out.
    unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
    APInt TruncMask = DemandedBits.zext(OperandBitWidth);
    if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
    if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
                             Depth + 1))
      return true;
    Known = Known.trunc(BitWidth);

@@ -2009,9 +2010,9 @@ bool TargetLowering::SimplifyDemandedBits(
          // undesirable.
          break;

        SDValue ShAmt = Src.getOperand(1);
        auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
        if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
        const APInt *ShAmtC =
            TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
        if (!ShAmtC)
          break;
        uint64_t ShVal = ShAmtC->getZExtValue();

+1 −0
Original line number Diff line number Diff line
@@ -3399,6 +3399,7 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
  return N->getOpcode() == ISD::SIGN_EXTEND ||
         N->getOpcode() == ISD::ANY_EXTEND ||
         isExtendedBUILD_VECTOR(N, DAG, true);
}
+18 −18
Original line number Diff line number Diff line
@@ -96,7 +96,7 @@ define <8 x i16> @amull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
; CHECK-NEXT:    bic v0.8h, #255, lsl #8
; CHECK-NEXT:    ret
  %tmp1 = load <8 x i8>, <8 x i8>* %A
@@ -113,7 +113,7 @@ define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
@@ -131,7 +131,7 @@ define <2 x i64> @amull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
@@ -258,7 +258,7 @@ define <8 x i16> @amlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) no
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    ldr d2, [x2]
; CHECK-NEXT:    umlal v0.8h, v1.8b, v2.8b
; CHECK-NEXT:    smlal v0.8h, v1.8b, v2.8b
; CHECK-NEXT:    bic v0.8h, #255, lsl #8
; CHECK-NEXT:    ret
  %tmp1 = load <8 x i16>, <8 x i16>* %A
@@ -278,7 +278,7 @@ define <4 x i32> @amlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C)
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    ldr d2, [x2]
; CHECK-NEXT:    umlal v0.4s, v1.4h, v2.4h
; CHECK-NEXT:    smlal v0.4s, v1.4h, v2.4h
; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
@@ -299,7 +299,7 @@ define <2 x i64> @amlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C)
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    ldr d2, [x2]
; CHECK-NEXT:    umlal v0.2d, v1.2s, v2.2s
; CHECK-NEXT:    smlal v0.2d, v1.2s, v2.2s
; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
@@ -428,7 +428,7 @@ define <8 x i16> @amlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) no
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    ldr d2, [x2]
; CHECK-NEXT:    umlsl v0.8h, v1.8b, v2.8b
; CHECK-NEXT:    smlsl v0.8h, v1.8b, v2.8b
; CHECK-NEXT:    bic v0.8h, #255, lsl #8
; CHECK-NEXT:    ret
  %tmp1 = load <8 x i16>, <8 x i16>* %A
@@ -448,7 +448,7 @@ define <4 x i32> @amlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C)
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    ldr d2, [x2]
; CHECK-NEXT:    umlsl v0.4s, v1.4h, v2.4h
; CHECK-NEXT:    smlsl v0.4s, v1.4h, v2.4h
; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
@@ -469,7 +469,7 @@ define <2 x i64> @amlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C)
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    ldr d2, [x2]
; CHECK-NEXT:    umlsl v0.2d, v1.2s, v2.2s
; CHECK-NEXT:    smlsl v0.2d, v1.2s, v2.2s
; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
@@ -586,7 +586,7 @@ define <8 x i16> @amull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; CHECK-LABEL: amull_extvec_v8i8_v8i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.8b, #12
; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
; CHECK-NEXT:    bic v0.8h, #255, lsl #8
; CHECK-NEXT:    ret
  %tmp3 = zext <8 x i8> %arg to <8 x i16>
@@ -600,7 +600,7 @@ define <4 x i32> @amull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w8, #1234
; CHECK-NEXT:    dup v1.4h, w8
; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
@@ -615,7 +615,7 @@ define <2 x i64> @amull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w8, #1234
; CHECK-NEXT:    dup v1.2s, w8
; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
@@ -752,8 +752,8 @@ define <4 x i64> @smull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
define <16 x i16> @amull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
; CHECK-LABEL: amull2_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    umull v2.8h, v0.8b, v1.8b
; CHECK-NEXT:    umull2 v1.8h, v0.16b, v1.16b
; CHECK-NEXT:    smull v2.8h, v0.8b, v1.8b
; CHECK-NEXT:    smull2 v1.8h, v0.16b, v1.16b
; CHECK-NEXT:    bic v2.8h, #255, lsl #8
; CHECK-NEXT:    bic v1.8h, #255, lsl #8
; CHECK-NEXT:    mov v0.16b, v2.16b
@@ -768,8 +768,8 @@ define <16 x i16> @amull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
; CHECK-LABEL: amull2_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    umull v2.4s, v0.4h, v1.4h
; CHECK-NEXT:    umull2 v0.4s, v0.8h, v1.8h
; CHECK-NEXT:    smull v2.4s, v0.4h, v1.4h
; CHECK-NEXT:    smull2 v0.4s, v0.8h, v1.8h
; CHECK-NEXT:    movi v3.2d, #0x00ffff0000ffff
; CHECK-NEXT:    and v1.16b, v0.16b, v3.16b
; CHECK-NEXT:    and v0.16b, v2.16b, v3.16b
@@ -784,8 +784,8 @@ define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
; CHECK-LABEL: amull2_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    umull v2.2d, v0.2s, v1.2s
; CHECK-NEXT:    umull2 v0.2d, v0.4s, v1.4s
; CHECK-NEXT:    smull v2.2d, v0.2s, v1.2s
; CHECK-NEXT:    smull2 v0.2d, v0.4s, v1.4s
; CHECK-NEXT:    movi v3.2d, #0x000000ffffffff
; CHECK-NEXT:    and v1.16b, v0.16b, v3.16b
; CHECK-NEXT:    and v0.16b, v2.16b, v3.16b
+11 −11
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@ define <4 x i16> @mlai16_trunc(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2
; CHECK-LABEL: mlai16_trunc:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-NEXT:    saddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT:    uaddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT:    xtn v0.4h, v0.4s
; CHECK-NEXT:    ret
entry:
@@ -21,7 +21,7 @@ entry:
define <4 x i32> @mlai16_and(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
; CHECK-LABEL: mlai16_and:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umull v0.4s, v1.4h, v0.4h
; CHECK-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-NEXT:    uaddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
@@ -43,7 +43,7 @@ define void @mlai16_loadstore(i16* %a, i16* %b, i16* %c) {
; CHECK-NEXT:    ldr d1, [x1, #16]
; CHECK-NEXT:    ldr d2, [x2, #16]
; CHECK-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-NEXT:    saddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT:    uaddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT:    xtn v0.4h, v0.4s
; CHECK-NEXT:    str d0, [x0, #16]
; CHECK-NEXT:    ret
@@ -89,8 +89,8 @@ entry:
define <4 x i32> @addmuli16_and(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
; CHECK-LABEL: addmuli16_and:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umull v1.4s, v1.4h, v2.4h
; CHECK-NEXT:    umlal v1.4s, v0.4h, v2.4h
; CHECK-NEXT:    smull v1.4s, v1.4h, v2.4h
; CHECK-NEXT:    smlal v1.4s, v0.4h, v2.4h
; CHECK-NEXT:    movi v0.2d, #0x00ffff0000ffff
; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
; CHECK-NEXT:    ret
@@ -141,7 +141,7 @@ define <2 x i32> @mlai32_trunc(<2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> %vec2
; CHECK-LABEL: mlai32_trunc:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smull v0.2d, v1.2s, v0.2s
; CHECK-NEXT:    saddw v0.2d, v0.2d, v2.2s
; CHECK-NEXT:    uaddw v0.2d, v0.2d, v2.2s
; CHECK-NEXT:    xtn v0.2s, v0.2d
; CHECK-NEXT:    ret
entry:
@@ -157,7 +157,7 @@ entry:
define <2 x i64> @mlai32_and(<2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> %vec2) {
; CHECK-LABEL: mlai32_and:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umull v0.2d, v1.2s, v0.2s
; CHECK-NEXT:    smull v0.2d, v1.2s, v0.2s
; CHECK-NEXT:    uaddw v0.2d, v0.2d, v2.2s
; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
@@ -179,7 +179,7 @@ define void @mlai32_loadstore(i32* %a, i32* %b, i32* %c) {
; CHECK-NEXT:    ldr d1, [x1, #32]
; CHECK-NEXT:    ldr d2, [x2, #32]
; CHECK-NEXT:    smull v0.2d, v1.2s, v0.2s
; CHECK-NEXT:    saddw v0.2d, v0.2d, v2.2s
; CHECK-NEXT:    uaddw v0.2d, v0.2d, v2.2s
; CHECK-NEXT:    xtn v0.2s, v0.2d
; CHECK-NEXT:    str d0, [x0, #32]
; CHECK-NEXT:    ret
@@ -225,8 +225,8 @@ entry:
define <2 x i64> @addmuli32_and(<2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> %vec2) {
; CHECK-LABEL: addmuli32_and:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT:    umlal v1.2d, v0.2s, v2.2s
; CHECK-NEXT:    smull v1.2d, v1.2s, v2.2s
; CHECK-NEXT:    smlal v1.2d, v0.2s, v2.2s
; CHECK-NEXT:    movi v0.2d, #0x000000ffffffff
; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
; CHECK-NEXT:    ret
@@ -359,7 +359,7 @@ define void @func2(i16* %a, i16* %b, i16* %c) {
; CHECK-NEXT:    str d1, [x1, #16]
; CHECK-NEXT:    ldr d1, [x2, #16]
; CHECK-NEXT:    smlal v0.4s, v1.4h, v2.4h
; CHECK-NEXT:    saddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT:    uaddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT:    xtn v0.4h, v0.4s
; CHECK-NEXT:    str d0, [x0, #16]
; CHECK-NEXT:    ret
Loading