Commit 2f4f8fcf authored by Craig Topper's avatar Craig Topper
Browse files

[X86] Don't add DELETED_NODES to DAG combine worklist after calling...

[X86] Don't add DELETED_NODES to DAG combine worklist after calling SimplifyDemandedBits/SimplifyDemandedVectorElts.

These AddToWorklist calls were added in 84cd968f.
It's possible the SimplifyDemandedBits/SimplifyDemandedVectorElts
triggered CSE that deleted N. Detect that and avoid adding N
to the worklist.

Fixes PR45067.
parent 9569a147
Loading
Loading
Loading
Loading
+12 −6
Original line number Diff line number Diff line
@@ -42212,6 +42212,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
  if (Mask.getScalarValueSizeInBits() != 1) {
    APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));
    if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) {
      if (N->getOpcode() != ISD::DELETED_NODE)
        DCI.AddToWorklist(N);
      return SDValue(N, 0);
    }
@@ -42479,6 +42480,7 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef,
                                     KnownZero, DCI)) {
    if (N->getOpcode() != ISD::DELETED_NODE)
      DCI.AddToWorklist(N);
    return SDValue(N, 0);
  }
@@ -43828,6 +43830,7 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
  unsigned BitWidth = N1.getValueSizeInBits();
  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
  if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(N1, DemandedMask, DCI)) {
    if (N->getOpcode() != ISD::DELETED_NODE)
      DCI.AddToWorklist(N);
    return SDValue(N, 0);
  }
@@ -43846,6 +43849,7 @@ static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG,
    APInt DemandedElts = APInt::getLowBitsSet(8, 4);
    if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
                                       DCI)) {
      if (N->getOpcode() != ISD::DELETED_NODE)
        DCI.AddToWorklist(N);
      return SDValue(N, 0);
    }
@@ -44755,6 +44759,7 @@ static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
    if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) {
      if (N->getOpcode() != ISD::DELETED_NODE)
        DCI.AddToWorklist(N);
      return SDValue(N, 0);
    }
@@ -44847,6 +44852,7 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
    if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) {
      if (N->getOpcode() != ISD::DELETED_NODE)
        DCI.AddToWorklist(N);
      return SDValue(N, 0);
    }
+23 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skylake | FileCheck %s

@global = external global i32, align 4

define void @foo(<8 x i32>* %x, <8 x i1> %y) {
; CHECK-LABEL: foo:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-NEXT:    vpbroadcastq _global@{{.*}}(%rip), %ymm2
; CHECK-NEXT:    vpgatherqd %xmm1, (,%ymm2), %xmm3
; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT:    vpslld $31, %ymm0, %ymm0
; CHECK-NEXT:    vinserti128 $1, %xmm3, %ymm3, %ymm1
; CHECK-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi)
; CHECK-NEXT:    ud2
  %tmp = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> <i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global>, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
  call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %tmp, <8 x i32>* %x, i32 4, <8 x i1> %y)
  unreachable
}

declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)