Commit 28b573d2 authored by Craig Topper's avatar Craig Topper
Browse files

[TargetLowering] Fix another potential FPE in expandFP_TO_UINT

D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence:

// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs
The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.)

Instead, I'd suggest to use the following sequence:

// Sel = Src < 0x8000000000000000
// FltOfs = select Sel, 0, 0x8000000000000000
// IntOfs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val - FltOfs) ^ IntOfs
In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway).

In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit.

There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.)

Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper

Differential Revision: https://reviews.llvm.org/D67105
parent 040c39d5
Loading
Loading
Loading
Loading
+15 −15
Original line number Diff line number Diff line
@@ -6064,28 +6064,28 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
    // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
    // signmask then offset (the result of which should be fully representable).
    // Sel = Src < 0x8000000000000000
    // Val = select Sel, Src, Src - 0x8000000000000000
    // Ofs = select Sel, 0, 0x8000000000000000
    // Result = fp_to_sint(Val) ^ Ofs
    // FltOfs = select Sel, 0, 0x8000000000000000
    // IntOfs = select Sel, 0, 0x8000000000000000
    // Result = fp_to_sint(Src - FltOfs) ^ IntOfs

    // TODO: Should any fast-math-flags be set for the FSUB?
    SDValue SrcBiased;
    if (Node->isStrictFPOpcode())
      SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, 
                              { Node->getOperand(0), Src, Cst });
    else
      SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
    SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
    SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
    SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
                                   DAG.getConstantFP(0.0, dl, SrcVT), Cst);
    SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
                                   DAG.getConstant(0, dl, DstVT),
                                   DAG.getConstant(SignMask, dl, DstVT));
    SDValue SInt;
    if (Node->isStrictFPOpcode()) {
      SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, 
                                { Node->getOperand(0), Src, FltOfs });
      SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, 
                         { SrcBiased.getValue(1), Val });
                         { Val.getValue(1), Val });
      Chain = SInt.getValue(1);
    } else
    } else {
      SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
      SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
    Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
    }
    Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
  } else {
    // Expand based on maximum range of FP_TO_SINT:
    // True = fp_to_sint(Src)
+11 −13
Original line number Diff line number Diff line
@@ -19047,8 +19047,9 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
    // of a signed i64.  Let Thresh be the FP equivalent of
    // 0x8000000000000000ULL.
    //
    //  Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
    //  FistSrc    = (Value < Thresh) ? Value : (Value - Thresh);
    //  Adjust = (Value < Thresh) ? 0 : 0x80000000;
    //  FltOfs = (Value < Thresh) ? 0 : 0x80000000;
    //  FistSrc = (Value - FltOfs);
    //  Fist-to-mem64 FistSrc
    //  Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
    //  to XOR'ing the high 32 bits with Adjust.
@@ -19056,7 +19057,6 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
    // Being a power of 2, Thresh is exactly representable in all FP formats.
    // For X87 we'd like to use the smallest FP type for this constant, but
    // for DAG type consistency we have to match the FP operand type.
    // FIXME: This code generates a spurious inexact exception for 1.0.
    APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000));
    LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
@@ -19082,18 +19082,16 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
                           DAG.getConstant(0, DL, MVT::i64),
                           DAG.getConstant(APInt::getSignMask(64),
                                           DL, MVT::i64));
    SDValue Sub;
    SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp,
                                   DAG.getConstantFP(0.0, DL, TheVT),
                                   ThreshVal);
    if (IsStrict) {
      Sub = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other},
                        { Chain, Value, ThreshVal });
      Chain = Sub.getValue(1);
      Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other},
                          { Chain, Value, FltOfs });
      Chain = Value.getValue(1);
    } else
      Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
    Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
                                              *DAG.getContext(), TheVT),
                       Value, ThreshVal, ISD::SETLT);
    Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
      Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs);
  }
  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI);
+21 −12
Original line number Diff line number Diff line
@@ -20,12 +20,15 @@ define i32 @f1(float %f) #0 {
; CHECK-NEXT:    larl %r1, .LCPI0_0
; CHECK-NEXT:    le %f1, 0(%r1)
; CHECK-NEXT:    cebr %f0, %f1
; CHECK-NEXT:    lhi %r0, 0
; CHECK-NEXT:    jl .LBB0_2
; CHECK-NEXT:    jnl .LBB0_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    sebr %f0, %f1
; CHECK-NEXT:    llilh %r0, 32768
; CHECK-NEXT:    lhi %r0, 0
; CHECK-NEXT:    lzer %f1
; CHECK-NEXT:    j .LBB0_3
; CHECK-NEXT:  .LBB0_2:
; CHECK-NEXT:    llilh %r0, 32768
; CHECK-NEXT:  .LBB0_3:
; CHECK-NEXT:    sebr %f0, %f1
; CHECK-NEXT:    cfebr %r2, 5, %f0
; CHECK-NEXT:    xr %r2, %r0
; CHECK-NEXT:    br %r14
@@ -41,12 +44,15 @@ define i32 @f2(double %f) #0 {
; CHECK-NEXT:    larl %r1, .LCPI1_0
; CHECK-NEXT:    ldeb %f1, 0(%r1)
; CHECK-NEXT:    cdbr %f0, %f1
; CHECK-NEXT:    lhi %r0, 0
; CHECK-NEXT:    jl .LBB1_2
; CHECK-NEXT:    jnl .LBB1_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    sdbr %f0, %f1
; CHECK-NEXT:    llilh %r0, 32768
; CHECK-NEXT:    lhi %r0, 0
; CHECK-NEXT:    lzdr %f1
; CHECK-NEXT:    j .LBB1_3
; CHECK-NEXT:  .LBB1_2:
; CHECK-NEXT:    llilh %r0, 32768
; CHECK-NEXT:  .LBB1_3:
; CHECK-NEXT:    sdbr %f0, %f1
; CHECK-NEXT:    cfdbr %r2, 5, %f0
; CHECK-NEXT:    xr %r2, %r0
; CHECK-NEXT:    br %r14
@@ -64,12 +70,15 @@ define i32 @f3(fp128 *%src) #0 {
; CHECK-NEXT:    larl %r1, .LCPI2_0
; CHECK-NEXT:    lxeb %f1, 0(%r1)
; CHECK-NEXT:    cxbr %f0, %f1
; CHECK-NEXT:    lhi %r0, 0
; CHECK-NEXT:    jl .LBB2_2
; CHECK-NEXT:    jnl .LBB2_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    sxbr %f0, %f1
; CHECK-NEXT:    llilh %r0, 32768
; CHECK-NEXT:    lhi %r0, 0
; CHECK-NEXT:    lzxr %f1
; CHECK-NEXT:    j .LBB2_3
; CHECK-NEXT:  .LBB2_2:
; CHECK-NEXT:    llilh %r0, 32768
; CHECK-NEXT:  .LBB2_3:
; CHECK-NEXT:    sxbr %f0, %f1
; CHECK-NEXT:    cfxbr %r2, 5, %f0
; CHECK-NEXT:    xr %r2, %r0
; CHECK-NEXT:    br %r14
+21 −12
Original line number Diff line number Diff line
@@ -19,12 +19,15 @@ define i64 @f1(float %f) #0 {
; CHECK-NEXT:    larl %r1, .LCPI0_0
; CHECK-NEXT:    le %f1, 0(%r1)
; CHECK-NEXT:    cebr %f0, %f1
; CHECK-NEXT:    lghi %r0, 0
; CHECK-NEXT:    jl .LBB0_2
; CHECK-NEXT:    jnl .LBB0_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    sebr %f0, %f1
; CHECK-NEXT:    llihh %r0, 32768
; CHECK-NEXT:    lghi %r0, 0
; CHECK-NEXT:    lzer %f1
; CHECK-NEXT:    j .LBB0_3
; CHECK-NEXT:  .LBB0_2:
; CHECK-NEXT:    llihh %r0, 32768
; CHECK-NEXT:  .LBB0_3:
; CHECK-NEXT:    sebr %f0, %f1
; CHECK-NEXT:    cgebr %r2, 5, %f0
; CHECK-NEXT:    xgr %r2, %r0
; CHECK-NEXT:    br %r14
@@ -40,12 +43,15 @@ define i64 @f2(double %f) #0 {
; CHECK-NEXT:    larl %r1, .LCPI1_0
; CHECK-NEXT:    ldeb %f1, 0(%r1)
; CHECK-NEXT:    cdbr %f0, %f1
; CHECK-NEXT:    lghi %r0, 0
; CHECK-NEXT:    jl .LBB1_2
; CHECK-NEXT:    jnl .LBB1_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    sdbr %f0, %f1
; CHECK-NEXT:    llihh %r0, 32768
; CHECK-NEXT:    lghi %r0, 0
; CHECK-NEXT:    lzdr %f1
; CHECK-NEXT:    j .LBB1_3
; CHECK-NEXT:  .LBB1_2:
; CHECK-NEXT:    llihh %r0, 32768
; CHECK-NEXT:  .LBB1_3:
; CHECK-NEXT:    sdbr %f0, %f1
; CHECK-NEXT:    cgdbr %r2, 5, %f0
; CHECK-NEXT:    xgr %r2, %r0
; CHECK-NEXT:    br %r14
@@ -63,12 +69,15 @@ define i64 @f3(fp128 *%src) #0 {
; CHECK-NEXT:    larl %r1, .LCPI2_0
; CHECK-NEXT:    lxeb %f1, 0(%r1)
; CHECK-NEXT:    cxbr %f0, %f1
; CHECK-NEXT:    lghi %r0, 0
; CHECK-NEXT:    jl .LBB2_2
; CHECK-NEXT:    jnl .LBB2_2
; CHECK-NEXT:  # %bb.1:
; CHECK-NEXT:    sxbr %f0, %f1
; CHECK-NEXT:    llihh %r0, 32768
; CHECK-NEXT:    lghi %r0, 0
; CHECK-NEXT:    lzxr %f1
; CHECK-NEXT:    j .LBB2_3
; CHECK-NEXT:  .LBB2_2:
; CHECK-NEXT:    llihh %r0, 32768
; CHECK-NEXT:  .LBB2_3:
; CHECK-NEXT:    sxbr %f0, %f1
; CHECK-NEXT:    cgxbr %r2, 5, %f0
; CHECK-NEXT:    xgr %r2, %r0
; CHECK-NEXT:    br %r14
+32 −30
Original line number Diff line number Diff line
@@ -444,20 +444,21 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
; X86-NEXT:    subl $16, %esp
; X86-NEXT:    fldt 8(%ebp)
; X86-NEXT:    flds {{\.LCPI.*}}
; X86-NEXT:    fld %st(1)
; X86-NEXT:    fsub %st(1), %st
; X86-NEXT:    fxch %st(1)
; X86-NEXT:    fucomp %st(2)
; X86-NEXT:    fucom %st(1)
; X86-NEXT:    fnstsw %ax
; X86-NEXT:    xorl %edx, %edx
; X86-NEXT:    # kill: def $ah killed $ah killed $ax
; X86-NEXT:    sahf
; X86-NEXT:    setbe %al
; X86-NEXT:    fldz
; X86-NEXT:    ja .LBB10_2
; X86-NEXT:  # %bb.1:
; X86-NEXT:    fstp %st(1)
; X86-NEXT:    fstp %st(0)
; X86-NEXT:    fldz
; X86-NEXT:    fxch %st(1)
; X86-NEXT:  .LBB10_2:
; X86-NEXT:    fstp %st(0)
; X86-NEXT:    setbe %al
; X86-NEXT:    fstp %st(1)
; X86-NEXT:    fsubrp %st, %st(1)
; X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    orl $3072, %ecx # imm = 0xC00
@@ -465,7 +466,7 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
; X86-NEXT:    movzbl %al, %edx
; X86-NEXT:    movb %al, %dl
; X86-NEXT:    shll $31, %edx
; X86-NEXT:    xorl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -477,14 +478,14 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
; X64-X87:       # %bb.0:
; X64-X87-NEXT:    fldt {{[0-9]+}}(%rsp)
; X64-X87-NEXT:    flds {{.*}}(%rip)
; X64-X87-NEXT:    fld %st(1)
; X64-X87-NEXT:    fsub %st(1), %st
; X64-X87-NEXT:    xorl %eax, %eax
; X64-X87-NEXT:    fucomi %st(1), %st
; X64-X87-NEXT:    setbe %al
; X64-X87-NEXT:    fldz
; X64-X87-NEXT:    fxch %st(1)
; X64-X87-NEXT:    fucompi %st(2), %st
; X64-X87-NEXT:    fcmovnbe %st(1), %st
; X64-X87-NEXT:    fstp %st(1)
; X64-X87-NEXT:    setbe %al
; X64-X87-NEXT:    fsubrp %st, %st(1)
; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-X87-NEXT:    orl $3072, %ecx # imm = 0xC00
@@ -500,13 +501,13 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
; X64-SSSE3:       # %bb.0:
; X64-SSSE3-NEXT:    fldt {{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT:    flds {{.*}}(%rip)
; X64-SSSE3-NEXT:    fld %st(1)
; X64-SSSE3-NEXT:    fsub %st(1), %st
; X64-SSSE3-NEXT:    xorl %eax, %eax
; X64-SSSE3-NEXT:    fucomi %st(1), %st
; X64-SSSE3-NEXT:    fldz
; X64-SSSE3-NEXT:    fxch %st(1)
; X64-SSSE3-NEXT:    fucompi %st(2), %st
; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st
; X64-SSSE3-NEXT:    fstp %st(1)
; X64-SSSE3-NEXT:    fsubrp %st, %st(1)
; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT:    setbe %al
; X64-SSSE3-NEXT:    shlq $63, %rax
@@ -526,20 +527,21 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-NEXT:    movl 8(%ebp), %eax
; X86-NEXT:    fldt (%eax)
; X86-NEXT:    flds {{\.LCPI.*}}
; X86-NEXT:    fld %st(1)
; X86-NEXT:    fsub %st(1), %st
; X86-NEXT:    fxch %st(1)
; X86-NEXT:    fucomp %st(2)
; X86-NEXT:    fucom %st(1)
; X86-NEXT:    fnstsw %ax
; X86-NEXT:    xorl %edx, %edx
; X86-NEXT:    # kill: def $ah killed $ah killed $ax
; X86-NEXT:    sahf
; X86-NEXT:    setbe %al
; X86-NEXT:    fldz
; X86-NEXT:    ja .LBB11_2
; X86-NEXT:  # %bb.1:
; X86-NEXT:    fstp %st(1)
; X86-NEXT:    fstp %st(0)
; X86-NEXT:    fldz
; X86-NEXT:    fxch %st(1)
; X86-NEXT:  .LBB11_2:
; X86-NEXT:    fstp %st(0)
; X86-NEXT:    setbe %al
; X86-NEXT:    fstp %st(1)
; X86-NEXT:    fsubrp %st, %st(1)
; X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    orl $3072, %ecx # imm = 0xC00
@@ -547,7 +549,7 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
; X86-NEXT:    movzbl %al, %edx
; X86-NEXT:    movb %al, %dl
; X86-NEXT:    shll $31, %edx
; X86-NEXT:    xorl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -559,14 +561,14 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
; X64-X87:       # %bb.0:
; X64-X87-NEXT:    fldt (%rdi)
; X64-X87-NEXT:    flds {{.*}}(%rip)
; X64-X87-NEXT:    fld %st(1)
; X64-X87-NEXT:    fsub %st(1), %st
; X64-X87-NEXT:    xorl %eax, %eax
; X64-X87-NEXT:    fucomi %st(1), %st
; X64-X87-NEXT:    setbe %al
; X64-X87-NEXT:    fldz
; X64-X87-NEXT:    fxch %st(1)
; X64-X87-NEXT:    fucompi %st(2), %st
; X64-X87-NEXT:    fcmovnbe %st(1), %st
; X64-X87-NEXT:    fstp %st(1)
; X64-X87-NEXT:    setbe %al
; X64-X87-NEXT:    fsubrp %st, %st(1)
; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-X87-NEXT:    orl $3072, %ecx # imm = 0xC00
@@ -582,13 +584,13 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
; X64-SSSE3:       # %bb.0:
; X64-SSSE3-NEXT:    fldt (%rdi)
; X64-SSSE3-NEXT:    flds {{.*}}(%rip)
; X64-SSSE3-NEXT:    fld %st(1)
; X64-SSSE3-NEXT:    fsub %st(1), %st
; X64-SSSE3-NEXT:    xorl %eax, %eax
; X64-SSSE3-NEXT:    fucomi %st(1), %st
; X64-SSSE3-NEXT:    fldz
; X64-SSSE3-NEXT:    fxch %st(1)
; X64-SSSE3-NEXT:    fucompi %st(2), %st
; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st
; X64-SSSE3-NEXT:    fstp %st(1)
; X64-SSSE3-NEXT:    fsubrp %st, %st(1)
; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT:    setbe %al
; X64-SSSE3-NEXT:    shlq $63, %rax
Loading