[DAG] Legalize abs(x) -> smax(x,sub(0,x)) iff smax/sub are legal (0637dfe8) · Commits · llvm-doe / llvm-project

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+8 −0

Original line number	Diff line number	Diff line
		@@ -6822,6 +6822,14 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
		EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
		SDValue Op = N->getOperand(0);

		// abs(x) -> smax(x,sub(0,x))
		if (isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMAX, VT)) {
		SDValue Zero = DAG.getConstant(0, dl, VT);
		Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
		DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
		return true;
		}

		// Only expand vector types if we have the appropriate vector operations.
		if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) \|\|
		!isOperationLegalOrCustom(ISD::ADD, VT) \|\|

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

+0 −47

Original line number	Diff line number	Diff line
		@@ -806,9 +806,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
		setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
		}

		for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
		setOperationAction(ISD::ABS, VT, Custom);

		// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
		// with merges, splats, etc.
		setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
		@@ -841,11 +838,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
		setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
		setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);

		// Without hasP8Altivec set, v2i64 SMAX isn't available.
		// But ABS custom lowering requires SMAX support.
		if (!Subtarget.hasP8Altivec())
		setOperationAction(ISD::ABS, MVT::v2i64, Expand);

		// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
		setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
		// With hasAltivec set, we can lower ISD::ROTL to vrl(b\|h\|w).
		@@ -10886,44 +10878,6 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
		}
		}

		SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {

		assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");

		EVT VT = Op.getValueType();
		assert(VT.isVector() &&
		"Only set vector abs as custom, scalar abs shouldn't reach here!");
		assert((VT == MVT::v2i64 \|\| VT == MVT::v4i32 \|\| VT == MVT::v8i16 \|\|
		VT == MVT::v16i8) &&
		"Unexpected vector element type!");
		assert((VT != MVT::v2i64 \|\| Subtarget.hasP8Altivec()) &&
		"Current subtarget doesn't support smax v2i64!");

		// For vector abs, it can be lowered to:
		// abs x
		// ==>
		// y = -x
		// smax(x, y)

		SDLoc dl(Op);
		SDValue X = Op.getOperand(0);
		SDValue Zero = DAG.getConstant(0, dl, VT);
		SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);

		// SMAX patch https://reviews.llvm.org/D47332
		// hasn't landed yet, so use intrinsic first here.
		// TODO: Should use SMAX directly once SMAX patch landed
		Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
		if (VT == MVT::v2i64)
		BifID = Intrinsic::ppc_altivec_vmaxsd;
		else if (VT == MVT::v8i16)
		BifID = Intrinsic::ppc_altivec_vmaxsh;
		else if (VT == MVT::v16i8)
		BifID = Intrinsic::ppc_altivec_vmaxsb;

		return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
		}

		// Custom lowering for fpext vf32 to v2f64
		SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {

		@@ -11059,7 +11013,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
		case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
		case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
		case ISD::MUL: return LowerMUL(Op, DAG);
		case ISD::ABS: return LowerABS(Op, DAG);
		case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
		case ISD::ROTL: return LowerROTL(Op, DAG);

llvm/lib/Target/PowerPC/PPCISelLowering.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -1157,7 +1157,6 @@ namespace llvm {
		SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/X86/abs.ll

+3 −4

Original line number	Diff line number	Diff line
		@@ -397,10 +397,9 @@ define <8 x i32> @test_v8i32(<8 x i32> %a) nounwind {
		define <8 x i16> @test_v8i16(<8 x i16> %a) nounwind {
		; SSE-LABEL: test_v8i16:
		; SSE: # %bb.0:
		; SSE-NEXT: movdqa %xmm0, %xmm1
		; SSE-NEXT: psraw $15, %xmm1
		; SSE-NEXT: paddw %xmm1, %xmm0
		; SSE-NEXT: pxor %xmm1, %xmm0
		; SSE-NEXT: pxor %xmm1, %xmm1
		; SSE-NEXT: psubw %xmm0, %xmm1
		; SSE-NEXT: pmaxsw %xmm1, %xmm0
		; SSE-NEXT: retq
		;
		; AVX-LABEL: test_v8i16:

llvm/test/CodeGen/X86/combine-abs.ll

+3 −4

Original line number	Diff line number	Diff line
		@@ -55,10 +55,9 @@ define i32 @combine_i32_abs_abs(i32 %a) {
		define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) {
		; SSE2-LABEL: combine_v8i16_abs_abs:
		; SSE2: # %bb.0:
		; SSE2-NEXT: movdqa %xmm0, %xmm1
		; SSE2-NEXT: psraw $15, %xmm1
		; SSE2-NEXT: paddw %xmm1, %xmm0
		; SSE2-NEXT: pxor %xmm1, %xmm0
		; SSE2-NEXT: pxor %xmm1, %xmm1
		; SSE2-NEXT: psubw %xmm0, %xmm1
		; SSE2-NEXT: pmaxsw %xmm1, %xmm0
		; SSE2-NEXT: retq
		;
		; SSE42-LABEL: combine_v8i16_abs_abs: