Commit 40163f1d authored by Sanjay Patel's avatar Sanjay Patel
Browse files

[x86] add special-case lowering for usubsat for AVX512

This is a small extension of D112095 to avoid another regression
seen with D112085.
In this case, we allow the same conversion from usubsat to ALU
ops if the target supports vpternlog.

That pattern will get converted later in X86DAGToDAGISel::tryVPTERNLOG().
This seems better than putting a magic immediate constant directly in
this code to create the exact vpternlog that we need. It's possible that
there are other special-cases along these lines, so we should try to
keep all of the vpternlog magic in one place.

Differential Revision: https://reviews.llvm.org/D112138
parent 048688fd
Loading
Loading
Loading
Loading
+26 −20
Original line number Diff line number Diff line
@@ -28139,9 +28139,13 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
      TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  unsigned BitWidth = VT.getScalarSizeInBits();
  if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
  if (Opcode == ISD::USUBSAT) {
    if (!TLI.isOperationLegal(ISD::UMAX, VT) || useVPTERNLOG(Subtarget, VT)) {
      // Handle a special-case with a bit-hack instead of cmp+select:
      // usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
      // If the target can use VPTERNLOG, DAGToDAG will match this as
      // "vpsra + vpternlog" which is better than "vpmax + vpsub" with a
      // "broadcast" constant load.
      ConstantSDNode *C = isConstOrConstSplat(Y, true);
      if (C && C->getAPIntValue().isSignMask()) {
        SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
@@ -28150,7 +28154,8 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
        SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
        return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
      }
    }
    if (!TLI.isOperationLegal(ISD::UMAX, VT)) {
      // usubsat X, Y --> (X >u Y) ? X - Y : 0
      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
      SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
@@ -28160,6 +28165,7 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
        return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
      return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
    }
  }
  if ((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
      (!VT.isVector() || VT == MVT::v2i64)) {
+2 −3
Original line number Diff line number Diff line
@@ -162,9 +162,8 @@ define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
;
; AVX512-LABEL: usubsat_custom:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
; AVX512-NEXT:    vpsrad $31, %xmm0, %xmm1
; AVX512-NEXT:    vpternlogd $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512-NEXT:    retq
  %res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>)
  ret <4 x i32> %res