Commit 3239b503 authored by Wang, Pengfei's avatar Wang, Pengfei
Browse files

[FPEnv] Add pragma FP_CONTRACT support under strict FP.

Summary: Support pragma FP_CONTRACT under strict FP.

Reviewers: craig.topper, andrew.w.kaylor, uweigand, RKSimon, LiuChen3

Subscribers: hiraditya, jdoerfert, cfe-commits, llvm-commits, LuoYuanke

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72820
parent 4c8817cd
Loading
Loading
Loading
Loading
+30 −6
Original line number Diff line number Diff line
@@ -3365,7 +3365,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
// the add operand respectively. This allows fmuladd to represent a*b-c, or
// c-a*b. Patterns in LLVM should catch the negated forms and translate them to
// efficient operations.
static Value* buildFMulAdd(llvm::BinaryOperator *MulOp, Value *Addend,
static Value* buildFMulAdd(llvm::Instruction *MulOp, Value *Addend,
                           const CodeGenFunction &CGF, CGBuilderTy &Builder,
                           bool negMul, bool negAdd) {
  assert(!(negMul && negAdd) && "Only one of negMul and negAdd should be set.");
@@ -3377,9 +3377,20 @@ static Value* buildFMulAdd(llvm::BinaryOperator *MulOp, Value *Addend,
  if (negAdd)
    Addend = Builder.CreateFNeg(Addend, "neg");

  Value *FMulAdd = Builder.CreateCall(
  Value *FMulAdd = nullptr;
  if (Builder.getIsFPConstrained()) {
    assert(isa<llvm::ConstrainedFPIntrinsic>(MulOp) &&
           "Only constrained operation should be created when Builder is in FP "
           "constrained mode");
    FMulAdd = Builder.CreateConstrainedFPCall(
        CGF.CGM.getIntrinsic(llvm::Intrinsic::experimental_constrained_fmuladd,
                             Addend->getType()),
        {MulOp0, MulOp1, Addend});
  } else {
    FMulAdd = Builder.CreateCall(
        CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()),
        {MulOp0, MulOp1, Addend});
  }
  MulOp->eraseFromParent();

  return FMulAdd;
@@ -3417,6 +3428,19 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
      return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false);
  }

  if (auto *LHSBinOp = dyn_cast<llvm::CallBase>(op.LHS)) {
    if (LHSBinOp->getIntrinsicID() ==
            llvm::Intrinsic::experimental_constrained_fmul &&
        LHSBinOp->use_empty())
      return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, false, isSub);
  }
  if (auto *RHSBinOp = dyn_cast<llvm::CallBase>(op.RHS)) {
    if (RHSBinOp->getIntrinsicID() ==
            llvm::Intrinsic::experimental_constrained_fmul &&
        RHSBinOp->use_empty())
      return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false);
  }

  return nullptr;
}

+12 −0
Original line number Diff line number Diff line
@@ -148,3 +148,15 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
// CHECK: declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata)
};

#pragma STDC FP_CONTRACT ON
void bar(float f) {
  f * f + f;
  (double)f * f - f;
  (long double)-f * f + f;

// CHECK: call float @llvm.experimental.constrained.fmuladd.f32
// CHECK: fneg
// CHECK: call double @llvm.experimental.constrained.fmuladd.f64
// CHECK: fneg
// CHECK: call x86_fp80 @llvm.experimental.constrained.fmuladd.f80
};
+63 −0
Original line number Diff line number Diff line
@@ -16141,6 +16141,69 @@ if either operand is a SNAN. The signaling comparison operation
performed by '``llvm.experimental.constrained.fcmps``' will raise an
exception if either operand is a NAN (QNAN or SNAN).
'``llvm.experimental.constrained.fmuladd``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
      declare <type>
      @llvm.experimental.constrained.fmuladd(<type> <op1>, <type> <op2>,
                                             <type> <op3>,
                                             metadata <rounding mode>,
                                             metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.fmuladd``' intrinsic represents
multiply-add expressions that can be fused if the code generator determines
that (a) the target instruction set has support for a fused operation,
and (b) that the fused operation is more efficient than the equivalent,
separate pair of mul and add instructions.
Arguments:
""""""""""
The first three arguments to the '``llvm.experimental.constrained.fmuladd``'
intrinsic must be floating-point or vector of floating-point values.
All three arguments must have identical types.
The fourth and fifth arguments specifiy the rounding mode and exception behavior
as described above.
Semantics:
""""""""""
The expression:
::
      %0 = call float @llvm.experimental.constrained.fmuladd.f32(%a, %b, %c,
                                                                 metadata <rounding mode>,
                                                                 metadata <exception behavior>)
is equivalent to the expression:
::
      %0 = call float @llvm.experimental.constrained.fmul.f32(%a, %b,
                                                              metadata <rounding mode>,
                                                              metadata <exception behavior>)
      %1 = call float @llvm.experimental.constrained.fadd.f32(%0, %c,
                                                              metadata <rounding mode>,
                                                              metadata <exception behavior>)
except that it is unspecified whether rounding will be performed between the
multiplication and addition steps. Fusion is not guaranteed, even if the target
platform supports it.
If a fused multiply-add is required, the corresponding
:ref:`llvm.experimental.constrained.fma <int_fma>` intrinsic function should be
used instead.
This never sets errno, just as '``llvm.experimental.constrained.fma.*``'.
Constrained libm-equivalent Intrinsics
--------------------------------------
+9 −0
Original line number Diff line number Diff line
@@ -1288,6 +1288,9 @@ public:
    case Intrinsic::fmuladd:
      ISDs.push_back(ISD::FMA);
      break;
    case Intrinsic::experimental_constrained_fmuladd:
      ISDs.push_back(ISD::STRICT_FMA);
      break;
    // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
    case Intrinsic::lifetime_start:
    case Intrinsic::lifetime_end:
@@ -1511,6 +1514,12 @@ public:
    if (IID == Intrinsic::fmuladd)
      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
    if (IID == Intrinsic::experimental_constrained_fmuladd)
      return ConcreteTTI->getIntrinsicCost(
                 Intrinsic::experimental_constrained_fmul, RetTy, Tys,
                 nullptr) +
             ConcreteTTI->getIntrinsicCost(
                 Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr);

    // Else, assume that we need to scalarize this intrinsic. For math builtins
    // this will emit a costly libcall, adding call overhead and spills. Make it
+4 −0
Original line number Diff line number Diff line
@@ -95,6 +95,10 @@ DAG_FUNCTION(sin, 1, 1, experimental_constrained_sin, FSIN)
DAG_FUNCTION(sqrt,            1, 1, experimental_constrained_sqrt,       FSQRT)
DAG_FUNCTION(trunc,           1, 0, experimental_constrained_trunc,      FTRUNC)

// This is definition for fmuladd intrinsic function, that is converted into
// constrained FMA or FMUL + FADD intrinsics.
FUNCTION(fmuladd,         3, 1, experimental_constrained_fmuladd)

#undef INSTRUCTION
#undef FUNCTION
#undef CMP_INSTRUCTION
Loading