Commit 5fc03679 authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r371088 and r371095:

------------------------------------------------------------------------
r371088 | spatel | 2019-09-05 18:58:18 +0200 (Thu, 05 Sep 2019) | 1 line

[x86] add test for horizontal math bug (PR43225); NFC
------------------------------------------------------------------------

------------------------------------------------------------------------
r371095 | spatel | 2019-09-05 19:28:17 +0200 (Thu, 05 Sep 2019) | 3 lines

[x86] fix horizontal math bug exposed by improved demanded elements analysis (PR43225)

https://bugs.llvm.org/show_bug.cgi?id=43225
------------------------------------------------------------------------

llvm-svn: 371178
parent c2551012
Loading
Loading
Loading
Loading
+24 −5
Original line number Diff line number Diff line
@@ -33594,7 +33594,7 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
}
/// Eliminate a redundant shuffle of a horizontal math op.
static SDValue foldShuffleOfHorizOp(SDNode *N) {
static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
  unsigned Opcode = N->getOpcode();
  if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
    if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
@@ -33625,6 +33625,25 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
      HOp.getOperand(0) != HOp.getOperand(1))
    return SDValue();
  // The shuffle that we are eliminating may have allowed the horizontal op to
  // have an undemanded (undefined) operand. Duplicate the other (defined)
  // operand to ensure that the results are defined across all lanes without the
  // shuffle.
  auto updateHOp = [](SDValue HorizOp, SelectionDAG &DAG) {
    SDValue X;
    if (HorizOp.getOperand(0).isUndef()) {
      assert(!HorizOp.getOperand(1).isUndef() && "Not expecting foldable h-op");
      X = HorizOp.getOperand(1);
    } else if (HorizOp.getOperand(1).isUndef()) {
      assert(!HorizOp.getOperand(0).isUndef() && "Not expecting foldable h-op");
      X = HorizOp.getOperand(0);
    } else {
      return HorizOp;
    }
    return DAG.getNode(HorizOp.getOpcode(), SDLoc(HorizOp),
                       HorizOp.getValueType(), X, X);
  };
  // When the operands of a horizontal math op are identical, the low half of
  // the result is the same as the high half. If a target shuffle is also
  // replicating low and high halves, we don't need the shuffle.
@@ -33635,7 +33654,7 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
      assert((HOp.getValueType() == MVT::v2f64 ||
        HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
        "Unexpected type for h-op");
      return HOp;
      return updateHOp(HOp, DAG);
    }
    return SDValue();
  }
@@ -33649,14 +33668,14 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
      (isTargetShuffleEquivalent(Mask, {0, 0}) ||
       isTargetShuffleEquivalent(Mask, {0, 1, 0, 1}) ||
       isTargetShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3})))
    return HOp;
    return updateHOp(HOp, DAG);
  if (HOp.getValueSizeInBits() == 256 &&
      (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}) ||
       isTargetShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) ||
       isTargetShuffleEquivalent(
           Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11})))
    return HOp;
    return updateHOp(HOp, DAG);
  return SDValue();
}
@@ -33710,7 +33729,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
    if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
      return AddSub;
    if (SDValue HAddSub = foldShuffleOfHorizOp(N))
    if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG))
      return HAddSub;
  }
+26 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx  | FileCheck %s

; Eliminating a shuffle means we have to replace an undef operand of a horizontal op.

define void @PR43225(<4 x double>* %p0, <4 x double>* %p1, <4 x double> %x, <4 x double> %y, <4 x double> %z) nounwind {
; CHECK-LABEL: PR43225:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vmovaps (%rdi), %ymm0
; CHECK-NEXT:    vmovaps (%rsi), %ymm0
; CHECK-NEXT:    vhsubpd %ymm2, %ymm2, %ymm0
; CHECK-NEXT:    vmovapd %ymm0, (%rdi)
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %t39 = load volatile <4 x double>, <4 x double>* %p0, align 32
  %shuffle11 = shufflevector <4 x double> %t39, <4 x double> %x, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
  %t40 = load volatile <4 x double>, <4 x double>* %p1, align 32
  %t41 = tail call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %shuffle11, <4 x double> %t40)
  %t42 = tail call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %z, <4 x double> %t41)
  %shuffle12 = shufflevector <4 x double> %t42, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
  store volatile <4 x double> %shuffle12, <4 x double>* %p0, align 32
  ret void
}

declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>)
declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>)