Merging r276435: (2ec706fe) · Commits · llvm-doe / llvm-project

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

+2 −1

Original line number	Diff line number	Diff line
		@@ -420,9 +420,10 @@ int TWO_PI = 0x40c90fdb;
		int PI = 0x40490fdb;
		int TWO_PI_INV = 0x3e22f983;
		int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
		int FP32_NEG_ONE = 0xbf800000;
		int FP32_ONE = 0x3f800000;
		int FP32_NEG_ONE = 0xbf800000;
		int FP64_ONE = 0x3ff0000000000000;
		int FP64_NEG_ONE = 0xbff0000000000000;
		}
		def CONST : Constants;

llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

+20 −5

Original line number	Diff line number	Diff line
		@@ -122,6 +122,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
		setOperationAction(ISD::SETCC, MVT::i32, Expand);
		setOperationAction(ISD::SETCC, MVT::f32, Expand);
		setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
		setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
		setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
		setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);

		@@ -832,13 +833,18 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
		return;
		case ISD::FP_TO_UINT:
		if (N->getValueType(0) == MVT::i1) {
		Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
		Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
		return;
		}
		// Fall-through. Since we don't care about out of bounds values
		// we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
		// considers some extra cases which are not necessary here.
		case ISD::FP_TO_SINT: {
		if (N->getValueType(0) == MVT::i1) {
		Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
		return;
		}

		SDValue Result;
		if (expandFP_TO_SINT(N, Result, DAG))
		Results.push_back(Result);
		@@ -1052,15 +1058,24 @@ SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
		return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
		}

		SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
		SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
		SDLoc DL(Op);
		return DAG.getNode(
		ISD::SETCC,
		DL,
		MVT::i1,
		Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
		DAG.getCondCode(ISD::SETNE)
		);
		Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
		DAG.getCondCode(ISD::SETEQ));
		}

		SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
		SDLoc DL(Op);
		return DAG.getNode(
		ISD::SETCC,
		DL,
		MVT::i1,
		Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
		DAG.getCondCode(ISD::SETEQ));
		}

		SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,

llvm/lib/Target/AMDGPU/R600ISelLowering.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -72,7 +72,8 @@ private:

		SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
		SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
		SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
		SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;

		SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/AMDGPU/SIInstructions.td

+10 −0

Original line number	Diff line number	Diff line
		@@ -3391,6 +3391,16 @@ def : Pat <
		(V_CNDMASK_B32_e64 0, -1, $src), sub1)
		>;

		class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat <
		(i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
		(i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
		>;

		def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>;
		def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>;
		def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>;
		def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>;

		// If we need to perform a logical operation on i1 values, we need to
		// use vector comparisons since there is only one SCC register. Vector
		// comparisions still write to a pair of SGPRs, so treat these as

llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll

+22 −1

Original line number	Diff line number	Diff line
		; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s
		; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s \| FileCheck -check-prefix=CI -check-prefix=FUNC %s

		declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
		declare i32 @llvm.amdgcn.workitem.id.x() #1
		declare double @llvm.fabs.f64(double) #1

		; FUNC-LABEL: @fp_to_sint_f64_i32
		; SI: v_cvt_i32_f64_e32
		@@ -54,3 +55,23 @@ define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in
		store i64 %cast, i64 addrspace(1)* %out, align 8
		ret void
		}

		; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1:
		; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}}
		define void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
		%conv = fptosi double %in to i1
		store i1 %conv, i1 addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1:
		; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, \|s{{\[[0-9]+:[0-9]+\]}}\|
		define void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
		%in.fabs = call double @llvm.fabs.f64(double %in)
		%conv = fptosi double %in.fabs to i1
		store i1 %conv, i1 addrspace(1)* %out
		ret void
		}

		attributes #0 = { nounwind }
		attributes #1 = { nounwind readnone }