Unverified Commit 796d2ec4 authored by Simon Pilgrim's avatar Simon Pilgrim Committed by GitHub
Browse files

[DAG] visitAND - attempt to fold (and buildvector(), buildvector()) -> buildvector() (#193987)

See if we can fold all elements of an AND of buildvectors: AND(-1,X) -> X, AND(0,X) -> 0, etc.

Companion to ##183032
parent 13e98d83
Loading
Loading
Loading
Loading
+34 −0
Original line number Diff line number Diff line
@@ -7663,6 +7663,40 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
      return N0;
    // fold (and buildvector(x,0,-1,w), buildvector(0,y,z,w))
    // --> buildvector(0,0,z,w)
    auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
    auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
    if (BV0 && BV1 && !BV0->getSplatValue() && !BV1->getSplatValue() &&
        N0.hasOneUse() && N1.hasOneUse() &&
        BV0->getOperand(0).getValueType() ==
            BV1->getOperand(0).getValueType()) {
      SmallVector<SDValue> MergedOps;
      unsigned NumElts = VT.getVectorNumElements();
      EVT EltVT = BV0->getOperand(0).getValueType();
      for (unsigned I = 0; I != NumElts; ++I) {
        auto *C0 = dyn_cast<ConstantSDNode>(BV0->getOperand(I));
        auto *C1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
        if (C0 && C1)
          MergedOps.push_back(DAG.getConstant(
              C0->getAPIntValue() & C1->getAPIntValue(), DL, EltVT));
        else if (C0 && C0->isZero())
          MergedOps.push_back(BV0->getOperand(I));
        else if (C1 && C1->isZero())
          MergedOps.push_back(BV1->getOperand(I));
        else if (C0 && C0->isAllOnes())
          MergedOps.push_back(BV1->getOperand(I));
        else if (C1 && C1->isAllOnes())
          MergedOps.push_back(BV0->getOperand(I));
        else if (BV0->getOperand(I) == BV1->getOperand(I))
          MergedOps.push_back(BV0->getOperand(I));
        else
          break;
      }
      if (MergedOps.size() == NumElts)
        return DAG.getBuildVector(VT, DL, MergedOps);
    }
    // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
    bool Frozen = N0.getOpcode() == ISD::FREEZE;
    auto *MLoad = dyn_cast<MaskedLoadSDNode>(Frozen ? N0.getOperand(0) : N0);
+12 −14
Original line number Diff line number Diff line
@@ -2668,15 +2668,14 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
; CHECK-LABEL: masked_load_zext_v3i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    fmov s0, w2
; CHECK-NEXT:    fmov s1, w1
; CHECK-NEXT:    adrp x8, .LCPI13_0
; CHECK-NEXT:    fmov s0, wzr
; CHECK-NEXT:    fmov s1, w3
; CHECK-NEXT:    fmov s2, w2
; CHECK-NEXT:    fmov s3, w1
; CHECK-NEXT:    ptrue p0.s, vl4
; CHECK-NEXT:    zip1 z0.h, z1.h, z0.h
; CHECK-NEXT:    fmov s1, w3
; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI13_0]
; CHECK-NEXT:    and z0.d, z0.d, z1.d
; CHECK-NEXT:    zip1 z1.h, z3.h, z2.h
; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
; CHECK-NEXT:    lsl z0.h, z0.h, #15
; CHECK-NEXT:    asr z0.h, z0.h, #15
; CHECK-NEXT:    sunpklo z0.s, z0.h
@@ -2741,15 +2740,14 @@ define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
; CHECK-LABEL: masked_load_sext_v3i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    fmov s0, w2
; CHECK-NEXT:    fmov s1, w1
; CHECK-NEXT:    adrp x8, .LCPI14_0
; CHECK-NEXT:    fmov s0, wzr
; CHECK-NEXT:    fmov s1, w3
; CHECK-NEXT:    fmov s2, w2
; CHECK-NEXT:    fmov s3, w1
; CHECK-NEXT:    ptrue p0.s, vl4
; CHECK-NEXT:    zip1 z0.h, z1.h, z0.h
; CHECK-NEXT:    fmov s1, w3
; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI14_0]
; CHECK-NEXT:    and z0.d, z0.d, z1.d
; CHECK-NEXT:    zip1 z1.h, z3.h, z2.h
; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
; CHECK-NEXT:    lsl z0.h, z0.h, #15
; CHECK-NEXT:    asr z0.h, z0.h, #15
; CHECK-NEXT:    sunpklo z0.s, z0.h
+10 −35
Original line number Diff line number Diff line
@@ -357,22 +357,10 @@ define void @and_zext_masks_v4i64(ptr %res, ptr %a, ptr %b) nounwind {
; LA32-NEXT:    xvld $xr0, $a1, 0
; LA32-NEXT:    xvld $xr1, $a2, 0
; LA32-NEXT:    xvfcmp.clt.d $xr0, $xr0, $xr1
; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 0
; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 0
; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 2
; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 1
; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 4
; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 2
; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 6
; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 3
; LA32-NEXT:    vldi $vr0, -1777
; LA32-NEXT:    vand.v $vr0, $vr1, $vr0
; LA32-NEXT:    vextrins.w $vr1, $vr0, 2
; LA32-NEXT:    vextrins.w $vr1, $vr0, 35
; LA32-NEXT:    vextrins.w $vr0, $vr0, 33
; LA32-NEXT:    xvpermi.q $xr0, $xr1, 2
; LA32-NEXT:    xvrepli.d $xr1, 1
; LA32-NEXT:    xvand.v $xr0, $xr0, $xr1
; LA32-NEXT:    xvrepli.b $xr1, 0
; LA32-NEXT:    xvextrins.w $xr1, $xr0, 0
; LA32-NEXT:    xvrepli.d $xr0, 1
; LA32-NEXT:    xvand.v $xr0, $xr1, $xr0
; LA32-NEXT:    xvst $xr0, $a0, 0
; LA32-NEXT:    ret
;
@@ -440,25 +428,12 @@ define void @and_sext_masks_v4i64(ptr %res, ptr %a, ptr %b) nounwind {
; LA32-NEXT:    xvld $xr0, $a1, 0
; LA32-NEXT:    xvld $xr1, $a2, 0
; LA32-NEXT:    xvfcmp.clt.d $xr0, $xr0, $xr1
; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 0
; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 0
; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 2
; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 1
; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 4
; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 2
; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 6
; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 3
; LA32-NEXT:    vldi $vr0, -1777
; LA32-NEXT:    vand.v $vr0, $vr1, $vr0
; LA32-NEXT:    vori.b $vr1, $vr0, 0
; LA32-NEXT:    vextrins.w $vr1, $vr1, 16
; LA32-NEXT:    vextrins.w $vr1, $vr0, 33
; LA32-NEXT:    vextrins.w $vr1, $vr0, 49
; LA32-NEXT:    vextrins.w $vr2, $vr0, 2
; LA32-NEXT:    vextrins.w $vr2, $vr0, 18
; LA32-NEXT:    vextrins.w $vr2, $vr0, 35
; LA32-NEXT:    vextrins.w $vr2, $vr0, 51
; LA32-NEXT:    xvpermi.q $xr1, $xr2, 2
; LA32-NEXT:    xvrepli.b $xr1, 0
; LA32-NEXT:    xvinsve0.w $xr1, $xr0, 0
; LA32-NEXT:    xvinsve0.w $xr1, $xr0, 1
; LA32-NEXT:    xvpickve.w $xr0, $xr0, 4
; LA32-NEXT:    xvinsve0.w $xr1, $xr0, 4
; LA32-NEXT:    xvinsve0.w $xr1, $xr0, 5
; LA32-NEXT:    xvst $xr1, $a0, 0
; LA32-NEXT:    ret
;
+2 −8
Original line number Diff line number Diff line
@@ -504,17 +504,11 @@ define <2 x i16> @test_and(<2 x i16> %a, <2 x i16> %b) #0 {
define <2 x i16> @test_and_computed(i16 %a) {
; COMMON-LABEL: test_and_computed(
; COMMON:       {
; COMMON-NEXT:    .reg .b16 %rs<4>;
; COMMON-NEXT:    .reg .b32 %r<4>;
; COMMON-NEXT:    .reg .b16 %rs<2>;
; COMMON-EMPTY:
; COMMON-NEXT:  // %bb.0:
; COMMON-NEXT:    ld.param.b16 %rs1, [test_and_computed_param_0];
; COMMON-NEXT:    mov.b16 %rs2, 0;
; COMMON-NEXT:    mov.b32 %r1, {%rs1, %rs2};
; COMMON-NEXT:    mov.b16 %rs3, 5;
; COMMON-NEXT:    mov.b32 %r2, {%rs1, %rs3};
; COMMON-NEXT:    and.b32 %r3, %r2, %r1;
; COMMON-NEXT:    st.param.b32 [func_retval0], %r3;
; COMMON-NEXT:    st.param.v2.b16 [func_retval0], {%rs1, 0};
; COMMON-NEXT:    ret;
  %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0
  %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1
+30 −34
Original line number Diff line number Diff line
@@ -331,41 +331,37 @@ define <2 x i128> @sdiv_v2i128(<2 x i128> %x, <2 x i128> %y, <2 x i1> %m) nounwi
define <3 x i10> @sdiv_v3i10(<3 x i10> %x, <3 x i10> %y, <3 x i1> %m) {
; CHECK-LABEL: sdiv_v3i10:
; CHECK:       # %bb.0:
; CHECK-NEXT:    mtfprwz 0, 9
; CHECK-NEXT:    mtfprwz 1, 10
; CHECK-NEXT:    addis 9, 2, .LCPI7_0@toc@ha
; CHECK-NEXT:    addi 9, 9, .LCPI7_0@toc@l
; CHECK-NEXT:    mtvsrwz 38, 8
; CHECK-NEXT:    lbz 12, 96(1)
; CHECK-NEXT:    li 11, 0
; CHECK-NEXT:    mtfprwz 2, 7
; CHECK-NEXT:    rldimi 9, 10, 32, 0
; CHECK-NEXT:    mtvsrwz 33, 8
; CHECK-NEXT:    vspltisw 4, 11
; CHECK-NEXT:    xxleqv 38, 38, 38
; CHECK-NEXT:    vadduwm 4, 4, 4
; CHECK-NEXT:    lxvd2x 2, 0, 9
; CHECK-NEXT:    xxmrghw 35, 1, 0
; CHECK-NEXT:    rldimi 12, 11, 32, 0
; CHECK-NEXT:    addis 11, 2, .LCPI7_0@toc@ha
; CHECK-NEXT:    addi 11, 11, .LCPI7_0@toc@l
; CHECK-NEXT:    mtfprd 1, 12
; CHECK-NEXT:    lxvd2x 0, 0, 11
; CHECK-NEXT:    xxswapd 34, 0
; CHECK-NEXT:    mtfprwz 0, 6
; CHECK-NEXT:    lbz 6, 96(1)
; CHECK-NEXT:    mtfprwz 1, 7
; CHECK-NEXT:    mtvsrwz 32, 6
; CHECK-NEXT:    addis 6, 2, .LCPI7_1@toc@ha
; CHECK-NEXT:    addi 6, 6, .LCPI7_1@toc@l
; CHECK-NEXT:    xxswapd 34, 2
; CHECK-NEXT:    xxmrghw 37, 1, 0
; CHECK-NEXT:    mtfprwz 1, 4
; CHECK-NEXT:    lxvd2x 0, 0, 6
; CHECK-NEXT:    vperm 5, 6, 5, 2
; CHECK-NEXT:    mtvsrwz 38, 5
; CHECK-NEXT:    vslw 5, 5, 4
; CHECK-NEXT:    vsraw 5, 5, 4
; CHECK-NEXT:    vperm 3, 0, 3, 2
; CHECK-NEXT:    xxswapd 32, 0
; CHECK-NEXT:    xxmrghw 35, 2, 0
; CHECK-NEXT:    mtfprd 0, 9
; CHECK-NEXT:    vperm 3, 1, 3, 2
; CHECK-NEXT:    mtvsrwz 33, 5
; CHECK-NEXT:    vslw 3, 3, 4
; CHECK-NEXT:    vsraw 3, 3, 4
; CHECK-NEXT:    xxmrghd 37, 1, 0
; CHECK-NEXT:    mtfprwz 0, 3
; CHECK-NEXT:    xxland 35, 35, 32
; CHECK-NEXT:    xxleqv 32, 32, 32
; CHECK-NEXT:    vslw 3, 3, 0
; CHECK-NEXT:    vsraw 3, 3, 0
; CHECK-NEXT:    xxmrghw 33, 1, 0
; CHECK-NEXT:    vperm 1, 6, 1, 2
; CHECK-NEXT:    vspltisw 6, 1
; CHECK-NEXT:    xxsel 0, 38, 37, 35
; CHECK-NEXT:    vslw 3, 1, 4
; CHECK-NEXT:    mtfprwz 1, 4
; CHECK-NEXT:    vslw 5, 5, 6
; CHECK-NEXT:    vsraw 5, 5, 6
; CHECK-NEXT:    xxmrghw 32, 1, 0
; CHECK-NEXT:    vperm 0, 1, 0, 2
; CHECK-NEXT:    vspltisw 1, 1
; CHECK-NEXT:    xxsel 0, 33, 35, 37
; CHECK-NEXT:    vslw 3, 0, 4
; CHECK-NEXT:    vsraw 3, 3, 4
; CHECK-NEXT:    xxswapd 1, 0
; CHECK-NEXT:    xxsldwi 3, 0, 0, 1
@@ -374,10 +370,10 @@ define <3 x i10> @sdiv_v3i10(<3 x i10> %x, <3 x i10> %y, <3 x i1> %m) {
; CHECK-NEXT:    xxsldwi 4, 35, 35, 1
; CHECK-NEXT:    mffprwz 4, 2
; CHECK-NEXT:    divw 3, 4, 3
; CHECK-NEXT:    mffprwz 4, 3
; CHECK-NEXT:    mffprwz 4, 4
; CHECK-NEXT:    mtfprwz 1, 3
; CHECK-NEXT:    mffprwz 3, 4
; CHECK-NEXT:    divw 3, 3, 4
; CHECK-NEXT:    mffprwz 3, 3
; CHECK-NEXT:    divw 3, 4, 3
; CHECK-NEXT:    mfvsrwz 4, 35
; CHECK-NEXT:    mtfprwz 2, 3
; CHECK-NEXT:    mffprwz 3, 0
Loading