Commit cdfccb83 authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r310552:

------------------------------------------------------------------------
r310552 | eladcohen | 2017-08-10 00:44:23 -0700 (Thu, 10 Aug 2017) | 19 lines

[SelectionDAG] When scalarizing vselect, don't assert on
a legal cond operand.

When scalarizing the result of a vselect, the legalizer currently expects
to already have scalarized the operands. While this is true for the true/false
operands (which have the same type as the result), it is not case for the
condition operand. On X86 AVX512, v1i1 is legal - this leads to operations such
as '< N x type> vselect < N x i1> < N x type> < N x type>' where < N x type > is
illegal to hit an assertion during the scalarization.

The handling is similar to r205625.
This also exposes the fact that (v1i1 extract_subvector) should be legal
and selectable on AVX512 - We do this by custom lowering to vector_extract_elt.
This still leaves us in some cases with redundant dag nodes which will be
combined in a separate soon to come patch.

This fixes pr33349.

Differential revision: https://reviews.llvm.org/D36511
------------------------------------------------------------------------

llvm-svn: 310635
parent 1d6f2b30
Loading
Loading
Loading
Loading
+15 −1
Original line number Diff line number Diff line
@@ -302,7 +302,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
  SDValue Cond = GetScalarizedVector(N->getOperand(0));
  SDValue Cond = N->getOperand(0);
  EVT OpVT = Cond.getValueType();
  SDLoc DL(N);
  // The vselect result and true/value operands needs scalarizing, but it's
  // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
  // See the similar logic in ScalarizeVecRes_VSETCC
  if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
    Cond = GetScalarizedVector(Cond);
  } else {
    EVT VT = OpVT.getVectorElementType();
    Cond = DAG.getNode(
        ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
        DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
  }

  SDValue LHS = GetScalarizedVector(N->getOperand(1));
  TargetLowering::BooleanContent ScalarBool =
      TLI.getBooleanContents(false, false);
+16 −1
Original line number Diff line number Diff line
@@ -1383,7 +1383,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    // (result) is 256-bit but the source is 512-bit wide.
    // 128-bit was made Custom under AVX1.
    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
                     MVT::v8f32, MVT::v4f64 })
                     MVT::v8f32, MVT::v4f64, MVT::v1i1 })
      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
    for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
                     MVT::v16i1, MVT::v32i1, MVT::v64i1 })
@@ -14570,6 +14570,21 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
  MVT ResVT = Op.getSimpleValueType();
  // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond
  // would result with: v1i1 = extract_subvector(vXi1, idx).
  // Lower these into extract_vector_elt which is already selectable.
  if (ResVT == MVT::v1i1) {
    assert(Subtarget.hasAVX512() &&
           "Boolean EXTRACT_SUBVECTOR requires AVX512");
    MVT EltVT = ResVT.getVectorElementType();
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    MVT LegalVT =
        (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT();
    SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx);
    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res);
  }
  assert((In.getSimpleValueType().is256BitVector() ||
          In.getSimpleValueType().is512BitVector()) &&
         "Can only extract from 256-bit or 512-bit vectors");
+92 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=KNL
; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=SKX

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

 define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr {
; KNL-LABEL: test:
; KNL:       # BB#0: # %bb
; KNL-NEXT:    vpextrb $0, %xmm0, %eax
; KNL-NEXT:    testb $1, %al
; KNL-NEXT:    fld1
; KNL-NEXT:    fldz
; KNL-NEXT:    fld %st(0)
; KNL-NEXT:    fcmovne %st(2), %st(0)
; KNL-NEXT:    vpextrb $4, %xmm0, %eax
; KNL-NEXT:    testb $1, %al
; KNL-NEXT:    fld %st(1)
; KNL-NEXT:    fcmovne %st(3), %st(0)
; KNL-NEXT:    vpextrb $8, %xmm0, %eax
; KNL-NEXT:    testb $1, %al
; KNL-NEXT:    fld %st(2)
; KNL-NEXT:    fcmovne %st(4), %st(0)
; KNL-NEXT:    vpextrb $12, %xmm0, %eax
; KNL-NEXT:    testb $1, %al
; KNL-NEXT:    fxch %st(3)
; KNL-NEXT:    fcmovne %st(4), %st(0)
; KNL-NEXT:    fstp %st(4)
; KNL-NEXT:    fxch %st(3)
; KNL-NEXT:    fstpt 30(%rdi)
; KNL-NEXT:    fxch %st(1)
; KNL-NEXT:    fstpt 20(%rdi)
; KNL-NEXT:    fxch %st(1)
; KNL-NEXT:    fstpt 10(%rdi)
; KNL-NEXT:    fstpt (%rdi)
; KNL-NEXT:    retq
;
; SKX-LABEL: test:
; SKX:       # BB#0: # %bb
; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
; SKX-NEXT:    kshiftrw $2, %k0, %k1
; SKX-NEXT:    kshiftlw $15, %k1, %k2
; SKX-NEXT:    kshiftrw $15, %k2, %k2
; SKX-NEXT:    kshiftlw $15, %k2, %k2
; SKX-NEXT:    kshiftrw $15, %k2, %k2
; SKX-NEXT:    kmovd %k2, %eax
; SKX-NEXT:    testb $1, %al
; SKX-NEXT:    fld1
; SKX-NEXT:    fldz
; SKX-NEXT:    fld %st(0)
; SKX-NEXT:    fcmovne %st(2), %st(0)
; SKX-NEXT:    kshiftlw $14, %k1, %k1
; SKX-NEXT:    kshiftrw $15, %k1, %k1
; SKX-NEXT:    kshiftlw $15, %k1, %k1
; SKX-NEXT:    kshiftrw $15, %k1, %k1
; SKX-NEXT:    kmovd %k1, %eax
; SKX-NEXT:    testb $1, %al
; SKX-NEXT:    fld %st(1)
; SKX-NEXT:    fcmovne %st(3), %st(0)
; SKX-NEXT:    kshiftlw $15, %k0, %k1
; SKX-NEXT:    kshiftrw $15, %k1, %k1
; SKX-NEXT:    kshiftlw $15, %k1, %k1
; SKX-NEXT:    kshiftrw $15, %k1, %k1
; SKX-NEXT:    kmovd %k1, %eax
; SKX-NEXT:    testb $1, %al
; SKX-NEXT:    fld %st(2)
; SKX-NEXT:    fcmovne %st(4), %st(0)
; SKX-NEXT:    kshiftlw $14, %k0, %k0
; SKX-NEXT:    kshiftrw $15, %k0, %k0
; SKX-NEXT:    kshiftlw $15, %k0, %k0
; SKX-NEXT:    kshiftrw $15, %k0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    testb $1, %al
; SKX-NEXT:    fxch %st(3)
; SKX-NEXT:    fcmovne %st(4), %st(0)
; SKX-NEXT:    fstp %st(4)
; SKX-NEXT:    fxch %st(3)
; SKX-NEXT:    fstpt 10(%rdi)
; SKX-NEXT:    fxch %st(1)
; SKX-NEXT:    fstpt (%rdi)
; SKX-NEXT:    fxch %st(1)
; SKX-NEXT:    fstpt 30(%rdi)
; SKX-NEXT:    fstpt 20(%rdi)
; SKX-NEXT:    retq
 bb:
   %tmp = select <4 x i1> %m, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80             0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
   store <4 x x86_fp80> %tmp, <4 x x86_fp80>* %p, align 16
   ret void
 }