[FPEnv] Add pragma FP_CONTRACT support under strict FP. (3239b503) · Commits · llvm-doe / llvm-project

clang/lib/CodeGen/CGExprScalar.cpp

+30 −6

Original line number	Diff line number	Diff line
		@@ -3365,7 +3365,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
		// the add operand respectively. This allows fmuladd to represent a*b-c, or
		// c-a*b. Patterns in LLVM should catch the negated forms and translate them to
		// efficient operations.
		static Value* buildFMulAdd(llvm::BinaryOperator MulOp, Value Addend,
		static Value* buildFMulAdd(llvm::Instruction MulOp, Value Addend,
		const CodeGenFunction &CGF, CGBuilderTy &Builder,
		bool negMul, bool negAdd) {
		assert(!(negMul && negAdd) && "Only one of negMul and negAdd should be set.");
		@@ -3377,9 +3377,20 @@ static Value* buildFMulAdd(llvm::BinaryOperator MulOp, Value Addend,
		if (negAdd)
		Addend = Builder.CreateFNeg(Addend, "neg");

		Value *FMulAdd = Builder.CreateCall(
		Value *FMulAdd = nullptr;
		if (Builder.getIsFPConstrained()) {
		assert(isa<llvm::ConstrainedFPIntrinsic>(MulOp) &&
		"Only constrained operation should be created when Builder is in FP "
		"constrained mode");
		FMulAdd = Builder.CreateConstrainedFPCall(
		CGF.CGM.getIntrinsic(llvm::Intrinsic::experimental_constrained_fmuladd,
		Addend->getType()),
		{MulOp0, MulOp1, Addend});
		} else {
		FMulAdd = Builder.CreateCall(
		CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()),
		{MulOp0, MulOp1, Addend});
		}
		MulOp->eraseFromParent();

		return FMulAdd;
		@@ -3417,6 +3428,19 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
		return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false);
		}

		if (auto *LHSBinOp = dyn_cast<llvm::CallBase>(op.LHS)) {
		if (LHSBinOp->getIntrinsicID() ==
		llvm::Intrinsic::experimental_constrained_fmul &&
		LHSBinOp->use_empty())
		return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, false, isSub);
		}
		if (auto *RHSBinOp = dyn_cast<llvm::CallBase>(op.RHS)) {
		if (RHSBinOp->getIntrinsicID() ==
		llvm::Intrinsic::experimental_constrained_fmul &&
		RHSBinOp->use_empty())
		return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false);
		}

		return nullptr;
		}

clang/test/CodeGen/constrained-math-builtins.c

+12 −0

Original line number	Diff line number	Diff line
		@@ -148,3 +148,15 @@ void foo(double d, float f, float fp, long double l, int i, const char *c) {
		// CHECK: declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata)
		};

		#pragma STDC FP_CONTRACT ON
		void bar(float f) {
		f * f + f;
		(double)f * f - f;
		(long double)-f * f + f;

		// CHECK: call float @llvm.experimental.constrained.fmuladd.f32
		// CHECK: fneg
		// CHECK: call double @llvm.experimental.constrained.fmuladd.f64
		// CHECK: fneg
		// CHECK: call x86_fp80 @llvm.experimental.constrained.fmuladd.f80
		};

llvm/docs/LangRef.rst

+63 −0

Original line number	Diff line number	Diff line
		@@ -16141,6 +16141,69 @@ if either operand is a SNAN. The signaling comparison operation
		performed by '``llvm.experimental.constrained.fcmps``' will raise an
		exception if either operand is a NAN (QNAN or SNAN).

		'``llvm.experimental.constrained.fmuladd``' Intrinsic
		^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

		Syntax:
		"""""""

		::

		declare <type>
		@llvm.experimental.constrained.fmuladd(<type> <op1>, <type> <op2>,
		<type> <op3>,
		metadata <rounding mode>,
		metadata <exception behavior>)

		Overview:
		"""""""""

		The '``llvm.experimental.constrained.fmuladd``' intrinsic represents
		multiply-add expressions that can be fused if the code generator determines
		that (a) the target instruction set has support for a fused operation,
		and (b) that the fused operation is more efficient than the equivalent,
		separate pair of mul and add instructions.

		Arguments:
		""""""""""

		The first three arguments to the '``llvm.experimental.constrained.fmuladd``'
		intrinsic must be floating-point or vector of floating-point values.
		All three arguments must have identical types.

		The fourth and fifth arguments specifiy the rounding mode and exception behavior
		as described above.

		Semantics:
		""""""""""

		The expression:

		::

		%0 = call float @llvm.experimental.constrained.fmuladd.f32(%a, %b, %c,
		metadata <rounding mode>,
		metadata <exception behavior>)

		is equivalent to the expression:

		::

		%0 = call float @llvm.experimental.constrained.fmul.f32(%a, %b,
		metadata <rounding mode>,
		metadata <exception behavior>)
		%1 = call float @llvm.experimental.constrained.fadd.f32(%0, %c,
		metadata <rounding mode>,
		metadata <exception behavior>)

		except that it is unspecified whether rounding will be performed between the
		multiplication and addition steps. Fusion is not guaranteed, even if the target
		platform supports it.
		If a fused multiply-add is required, the corresponding
		:ref:`llvm.experimental.constrained.fma <int_fma>` intrinsic function should be
		used instead.
		This never sets errno, just as '``llvm.experimental.constrained.fma.*``'.

		Constrained libm-equivalent Intrinsics
		--------------------------------------

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+9 −0

Original line number	Diff line number	Diff line
		@@ -1288,6 +1288,9 @@ public:
		case Intrinsic::fmuladd:
		ISDs.push_back(ISD::FMA);
		break;
		case Intrinsic::experimental_constrained_fmuladd:
		ISDs.push_back(ISD::STRICT_FMA);
		break;
		// FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
		case Intrinsic::lifetime_start:
		case Intrinsic::lifetime_end:
		@@ -1511,6 +1514,12 @@ public:
		if (IID == Intrinsic::fmuladd)
		return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
		ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
		if (IID == Intrinsic::experimental_constrained_fmuladd)
		return ConcreteTTI->getIntrinsicCost(
		Intrinsic::experimental_constrained_fmul, RetTy, Tys,
		nullptr) +
		ConcreteTTI->getIntrinsicCost(
		Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr);

		// Else, assume that we need to scalarize this intrinsic. For math builtins
		// this will emit a costly libcall, adding call overhead and spills. Make it

llvm/include/llvm/IR/ConstrainedOps.def

+4 −0

Original line number	Diff line number	Diff line
		@@ -95,6 +95,10 @@ DAG_FUNCTION(sin, 1, 1, experimental_constrained_sin, FSIN)
		DAG_FUNCTION(sqrt, 1, 1, experimental_constrained_sqrt, FSQRT)
		DAG_FUNCTION(trunc, 1, 0, experimental_constrained_trunc, FTRUNC)

		// This is definition for fmuladd intrinsic function, that is converted into
		// constrained FMA or FMUL + FADD intrinsics.
		FUNCTION(fmuladd, 3, 1, experimental_constrained_fmuladd)

		#undef INSTRUCTION
		#undef FUNCTION
		#undef CMP_INSTRUCTION