Commit c43b8ec7 authored by Craig Topper's avatar Craig Topper
Browse files

[X86] Add support for STRICT_FP_ROUND/STRICT_FP_EXTEND from/to fp128 to/from...

[X86] Add support for STRICT_FP_ROUND/STRICT_FP_EXTEND from/to fp128 to/from f32/f64/f80 in 64-bit mode.

These need to emit a libcall like we do for the non-strict version.

32-bit mode needs to SoftenFloat support to be implemented for strict FP nodes.

Differential Revision: https://reviews.llvm.org/D70504
parent 01d8e09f
Loading
Loading
Loading
Loading
+37 −17
Original line number Diff line number Diff line
@@ -691,6 +691,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
    setOperationAction(ISD::FP_EXTEND,        MVT::f128, Custom);
    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
    // We need to custom handle any FP_ROUND with an f128 input, but
    // LegalizeDAG uses the result type to know when to run a custom handler.
    // So we have to list all legal floating point result types here.
@@ -19714,9 +19715,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
}
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
  bool IsStrict = Op->isStrictFPOpcode();
  SDLoc DL(Op);
  MVT VT = Op.getSimpleValueType();
  SDValue In = Op.getOperand(0);
  SDValue In = Op.getOperand(IsStrict ? 1 : 0);
  MVT SVT = In.getSimpleValueType();
  if (VT == MVT::f128) {
@@ -19725,6 +19728,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
  }
  assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
  // FIXME: Strict fp.
  assert(!IsStrict && "Strict FP not supported yet!");
  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
                     DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
@@ -19732,8 +19737,10 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
}
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
  bool IsStrict = Op->isStrictFPOpcode();
  MVT VT = Op.getSimpleValueType();
  SDValue In = Op.getOperand(0);
  SDValue In = Op.getOperand(IsStrict ? 1 : 0);
  MVT SVT = In.getSimpleValueType();
  // It's legal except when f128 is involved
@@ -19745,17 +19752,17 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
  // FP_ROUND node has a second operand indicating whether it is known to be
  // precise. That doesn't take part in the LibCall so we can't directly use
  // LowerF128Call.
  SDLoc dl(Op);
  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
  MakeLibCallOptions CallOptions;
  return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first;
}
  std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, In, CallOptions,
                                                dl, Chain);
// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking
// the default expansion of STRICT_FP_ROUND.
static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) {
  // FIXME: Need to form a libcall with an input chain for f128.
  assert(Op.getOperand(0).getValueType() != MVT::f128 &&
         "Don't know how to handle f128 yet!");
  return Op;
  if (IsStrict)
    return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
  return Tmp.first;
}
/// Depending on uarch and/or optimizing for size, we might prefer to use a
@@ -27773,9 +27780,21 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
                                         RTLIB::Libcall Call) const {
  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
  bool IsStrict = Op->isStrictFPOpcode();
  unsigned Offset = IsStrict ? 1 : 0;
  SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
  SDLoc dl(Op);
  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
  MakeLibCallOptions CallOptions;
  return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
  std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, Call, MVT::f128, Ops,
                                                CallOptions, dl, Chain);
  if (IsStrict)
    return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
  return Tmp.first;
}
/// Provide custom lowering hooks for some operations.
@@ -27825,9 +27844,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  case ISD::STRICT_FP_TO_SINT:
  case ISD::FP_TO_UINT:
  case ISD::STRICT_FP_TO_UINT:  return LowerFP_TO_INT(Op, DAG);
  case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
  case ISD::FP_ROUND:           return LowerFP_ROUND(Op, DAG);
  case ISD::STRICT_FP_ROUND:    return LowerSTRICT_FP_ROUND(Op, DAG);
  case ISD::FP_EXTEND:
  case ISD::STRICT_FP_EXTEND:   return LowerFP_EXTEND(Op, DAG);
  case ISD::FP_ROUND:
  case ISD::STRICT_FP_ROUND:    return LowerFP_ROUND(Op, DAG);
  case ISD::LOAD:               return LowerLoad(Op, Subtarget, DAG);
  case ISD::STORE:              return LowerStore(Op, Subtarget, DAG);
  case ISD::FADD:
+175 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE
; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE
; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX

; Check soft floating point conversion function calls.

@vf32 = common global float 0.000000e+00, align 4
@vf64 = common global double 0.000000e+00, align 8
@vf80 = common global x86_fp80 0xK00000000000000000000, align 8
@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16

define void @TestFPExtF32_F128() nounwind strictfp {
; X64-SSE-LABEL: TestFPExtF32_F128:
; X64-SSE:       # %bb.0: # %entry
; X64-SSE-NEXT:    pushq %rax
; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-SSE-NEXT:    callq __extendsftf2
; X64-SSE-NEXT:    movaps %xmm0, {{.*}}(%rip)
; X64-SSE-NEXT:    popq %rax
; X64-SSE-NEXT:    retq
;
; X64-AVX-LABEL: TestFPExtF32_F128:
; X64-AVX:       # %bb.0: # %entry
; X64-AVX-NEXT:    pushq %rax
; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-AVX-NEXT:    callq __extendsftf2
; X64-AVX-NEXT:    vmovaps %xmm0, {{.*}}(%rip)
; X64-AVX-NEXT:    popq %rax
; X64-AVX-NEXT:    retq
entry:
  %0 = load float, float* @vf32, align 4
  %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %0, metadata !"fpexcept.strict") #0
  store fp128 %conv, fp128* @vf128, align 16
  ret void
}

define void @TestFPExtF64_F128() nounwind strictfp {
; X64-SSE-LABEL: TestFPExtF64_F128:
; X64-SSE:       # %bb.0: # %entry
; X64-SSE-NEXT:    pushq %rax
; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; X64-SSE-NEXT:    callq __extenddftf2
; X64-SSE-NEXT:    movaps %xmm0, {{.*}}(%rip)
; X64-SSE-NEXT:    popq %rax
; X64-SSE-NEXT:    retq
;
; X64-AVX-LABEL: TestFPExtF64_F128:
; X64-AVX:       # %bb.0: # %entry
; X64-AVX-NEXT:    pushq %rax
; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; X64-AVX-NEXT:    callq __extenddftf2
; X64-AVX-NEXT:    vmovaps %xmm0, {{.*}}(%rip)
; X64-AVX-NEXT:    popq %rax
; X64-AVX-NEXT:    retq
entry:
  %0 = load double, double* @vf64, align 8
  %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %0, metadata !"fpexcept.strict") #0
  store fp128 %conv, fp128* @vf128, align 16
  ret void
}

define void @TestFPExtF80_F128() nounwind strictfp {
; X64-SSE-LABEL: TestFPExtF80_F128:
; X64-SSE:       # %bb.0: # %entry
; X64-SSE-NEXT:    subq $24, %rsp
; X64-SSE-NEXT:    fldt {{.*}}(%rip)
; X64-SSE-NEXT:    fstpt (%rsp)
; X64-SSE-NEXT:    callq __extendxftf2
; X64-SSE-NEXT:    movaps %xmm0, {{.*}}(%rip)
; X64-SSE-NEXT:    addq $24, %rsp
; X64-SSE-NEXT:    retq
;
; X64-AVX-LABEL: TestFPExtF80_F128:
; X64-AVX:       # %bb.0: # %entry
; X64-AVX-NEXT:    subq $24, %rsp
; X64-AVX-NEXT:    fldt {{.*}}(%rip)
; X64-AVX-NEXT:    fstpt (%rsp)
; X64-AVX-NEXT:    callq __extendxftf2
; X64-AVX-NEXT:    vmovaps %xmm0, {{.*}}(%rip)
; X64-AVX-NEXT:    addq $24, %rsp
; X64-AVX-NEXT:    retq
entry:
  %0 = load x86_fp80, x86_fp80* @vf80, align 8
  %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80 %0, metadata !"fpexcept.strict") #0
  store fp128 %conv, fp128* @vf128, align 16
  ret void
}

define void @TestFPTruncF128_F32() nounwind strictfp {
; X64-SSE-LABEL: TestFPTruncF128_F32:
; X64-SSE:       # %bb.0: # %entry
; X64-SSE-NEXT:    pushq %rax
; X64-SSE-NEXT:    movaps {{.*}}(%rip), %xmm0
; X64-SSE-NEXT:    callq __trunctfsf2
; X64-SSE-NEXT:    movss %xmm0, {{.*}}(%rip)
; X64-SSE-NEXT:    popq %rax
; X64-SSE-NEXT:    retq
;
; X64-AVX-LABEL: TestFPTruncF128_F32:
; X64-AVX:       # %bb.0: # %entry
; X64-AVX-NEXT:    pushq %rax
; X64-AVX-NEXT:    vmovaps {{.*}}(%rip), %xmm0
; X64-AVX-NEXT:    callq __trunctfsf2
; X64-AVX-NEXT:    vmovss %xmm0, {{.*}}(%rip)
; X64-AVX-NEXT:    popq %rax
; X64-AVX-NEXT:    retq
entry:
  %0 = load fp128, fp128* @vf128, align 16
  %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
  store float %conv, float* @vf32, align 4
  ret void
}

define void @TestFPTruncF128_F64() nounwind strictfp {
; X64-SSE-LABEL: TestFPTruncF128_F64:
; X64-SSE:       # %bb.0: # %entry
; X64-SSE-NEXT:    pushq %rax
; X64-SSE-NEXT:    movaps {{.*}}(%rip), %xmm0
; X64-SSE-NEXT:    callq __trunctfdf2
; X64-SSE-NEXT:    movsd %xmm0, {{.*}}(%rip)
; X64-SSE-NEXT:    popq %rax
; X64-SSE-NEXT:    retq
;
; X64-AVX-LABEL: TestFPTruncF128_F64:
; X64-AVX:       # %bb.0: # %entry
; X64-AVX-NEXT:    pushq %rax
; X64-AVX-NEXT:    vmovaps {{.*}}(%rip), %xmm0
; X64-AVX-NEXT:    callq __trunctfdf2
; X64-AVX-NEXT:    vmovsd %xmm0, {{.*}}(%rip)
; X64-AVX-NEXT:    popq %rax
; X64-AVX-NEXT:    retq
entry:
  %0 = load fp128, fp128* @vf128, align 16
  %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
  store double %conv, double* @vf64, align 8
  ret void
}

define void @TestFPTruncF128_F80() nounwind strictfp {
; X64-SSE-LABEL: TestFPTruncF128_F80:
; X64-SSE:       # %bb.0: # %entry
; X64-SSE-NEXT:    pushq %rax
; X64-SSE-NEXT:    movaps {{.*}}(%rip), %xmm0
; X64-SSE-NEXT:    callq __trunctfxf2
; X64-SSE-NEXT:    fstpt {{.*}}(%rip)
; X64-SSE-NEXT:    popq %rax
; X64-SSE-NEXT:    retq
;
; X64-AVX-LABEL: TestFPTruncF128_F80:
; X64-AVX:       # %bb.0: # %entry
; X64-AVX-NEXT:    pushq %rax
; X64-AVX-NEXT:    vmovaps {{.*}}(%rip), %xmm0
; X64-AVX-NEXT:    callq __trunctfxf2
; X64-AVX-NEXT:    fstpt {{.*}}(%rip)
; X64-AVX-NEXT:    popq %rax
; X64-AVX-NEXT:    retq
entry:
  %0 = load fp128, fp128* @vf128, align 16
  %conv = call x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
  store x86_fp80 %conv, x86_fp80* @vf80, align 8
  ret void
}

attributes #0 = { strictfp }

declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
declare fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80, metadata)