Commit 1a9fbf61 authored by Simon Pilgrim's avatar Simon Pilgrim
Browse files

[X86] combineLoad - reuse an existing VBROADCAST_LOAD constant for a smaller...

[X86] combineLoad - reuse an existing VBROADCAST_LOAD constant for a smaller vector load of the same constant

Extends the existing code that performed something similar for SUBV_BROADCAST_LOAD, but this is just for cases where AVX2 targets loads full width 128-bit constant vectors but broadcasts the equivalent 256-bit constant vector

Fixes AVX2 case for Issue #70947
parent 9e618e5e
Loading
Loading
Loading
Loading
+33 −11
Original line number Diff line number Diff line
@@ -49785,26 +49785,48 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
    }
  }
  // If we also broadcast this as a subvector to a wider type, then just extract
  // the lowest subvector.
  // If we also broadcast this to a wider type, then just extract the lowest
  // subvector.
  if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() &&
      (RegVT.is128BitVector() || RegVT.is256BitVector())) {
    SDValue Ptr = Ld->getBasePtr();
    SDValue Chain = Ld->getChain();
    for (SDNode *User : Ptr->uses()) {
      if (User != N && User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
          cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
    for (SDNode *User : Chain->uses()) {
      if (User != N &&
          (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||
           User->getOpcode() == X86ISD::VBROADCAST_LOAD) &&
          cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
          cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
              MemVT.getSizeInBits() &&
          !User->hasAnyUseOfValue(1) &&
          User->getValueSizeInBits(0).getFixedValue() >
              RegVT.getFixedSizeInBits()) {
        if (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
            cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
            cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
                MemVT.getSizeInBits()) {
          SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
                                             RegVT.getSizeInBits());
          Extract = DAG.getBitcast(RegVT, Extract);
          return DCI.CombineTo(N, Extract, SDValue(User, 1));
        }
        if (User->getOpcode() == X86ISD::VBROADCAST_LOAD &&
            getTargetConstantFromBasePtr(Ptr)) {
          // See if we are loading a constant that has also been broadcast.
          APInt Undefs, UserUndefs;
          SmallVector<APInt> Bits, UserBits;
          if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&
              getTargetConstantBitsFromNode(SDValue(User, 0), 8, UserUndefs,
                                            UserBits)) {
            UserUndefs = UserUndefs.trunc(Undefs.getBitWidth());
            UserBits.truncate(Bits.size());
            if (Bits == UserBits && UserUndefs.isSubsetOf(Undefs)) {
              SDValue Extract = extractSubVector(
                  SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());
              Extract = DAG.getBitcast(RegVT, Extract);
              return DCI.CombineTo(N, Extract, SDValue(User, 1));
            }
          }
        }
      }
    }
  }
+6 −8
Original line number Diff line number Diff line
@@ -332,10 +332,9 @@ define void @PR70947(ptr %src, ptr %dst) {
; X86-AVX2:       # %bb.0:
; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-AVX2-NEXT:    vmovups 32(%ecx), %xmm0
; X86-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
; X86-AVX2-NEXT:    vandps (%ecx), %ymm1, %ymm1
; X86-AVX2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
; X86-AVX2-NEXT:    vandps (%ecx), %ymm0, %ymm1
; X86-AVX2-NEXT:    vandps 32(%ecx), %xmm0, %xmm0
; X86-AVX2-NEXT:    vmovups %ymm1, (%eax)
; X86-AVX2-NEXT:    vmovups %xmm0, 16(%eax)
; X86-AVX2-NEXT:    vzeroupper
@@ -378,10 +377,9 @@ define void @PR70947(ptr %src, ptr %dst) {
;
; X64-AVX2-LABEL: PR70947:
; X64-AVX2:       # %bb.0:
; X64-AVX2-NEXT:    vmovups 32(%rdi), %xmm0
; X64-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
; X64-AVX2-NEXT:    vandps (%rdi), %ymm1, %ymm1
; X64-AVX2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
; X64-AVX2-NEXT:    vandps (%rdi), %ymm0, %ymm1
; X64-AVX2-NEXT:    vandps 32(%rdi), %xmm0, %xmm0
; X64-AVX2-NEXT:    vmovups %ymm1, (%rsi)
; X64-AVX2-NEXT:    vmovups %xmm0, 16(%rsi)
; X64-AVX2-NEXT:    vzeroupper