[AArch64] Convert negative constant aarch64_neon_sshl to VASHR (#68918) (4266815f) · Commits · llvm-doe / llvm-project

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+8 −3

Original line number	Diff line number	Diff line
		@@ -19100,8 +19100,13 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
		case Intrinsic::aarch64_neon_sshl:
		case Intrinsic::aarch64_neon_ushl:
		// For positive shift amounts we can use SHL, as ushl/sshl perform a regular
		// left shift for positive shift amounts. Below, we only replace the current
		// node with VSHL, if this condition is met.
		// left shift for positive shift amounts. For negative shifts we can use a
		// VASHR/VLSHR as appropiate.
		if (ShiftAmount < 0) {
		Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
		: AArch64ISD::VLSHR;
		ShiftAmount = -ShiftAmount;
		} else
		Opcode = AArch64ISD::VSHL;
		IsRightShift = false;
		break;

llvm/test/CodeGen/AArch64/arm64-vshift.ll

+4 −9

Original line number	Diff line number	Diff line
		@@ -2130,9 +2130,8 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind {
		; CHECK-LABEL: neon.ushll4s_neg_constant_shift:
		; CHECK: // %bb.0:
		; CHECK-NEXT: ldr d0, [x0]
		; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff
		; CHECK-NEXT: ushll.4s v0, v0, #0
		; CHECK-NEXT: ushl.4s v0, v0, v1
		; CHECK-NEXT: ushr.4s v0, v0, #1
		; CHECK-NEXT: ret
		%tmp1 = load <4 x i16>, ptr %A
		%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
		@@ -2250,9 +2249,8 @@ define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind {
		define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind {
		; CHECK-LABEL: neon.sshl16b_neg_constant_shift:
		; CHECK: // %bb.0:
		; CHECK-NEXT: movi.16b v1, #254
		; CHECK-NEXT: ldr q0, [x0]
		; CHECK-NEXT: sshl.16b v0, v0, v1
		; CHECK-NEXT: sshr.16b v0, v0, #2
		; CHECK-NEXT: ret
		%tmp1 = load <16 x i8>, ptr %A
		%tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
		@@ -2300,9 +2298,8 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind {
		; CHECK-LABEL: neon.sshll4s_neg_constant_shift:
		; CHECK: // %bb.0:
		; CHECK-NEXT: ldr d0, [x0]
		; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff
		; CHECK-NEXT: sshll.4s v0, v0, #0
		; CHECK-NEXT: sshl.4s v0, v0, v1
		; CHECK-NEXT: sshr.4s v0, v0, #1
		; CHECK-NEXT: ret
		%tmp1 = load <4 x i16>, ptr %A
		%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
		@@ -2377,10 +2374,8 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind {
		; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1:
		; CHECK: // %bb.0:
		; CHECK-NEXT: ldr w8, [x0]
		; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
		; CHECK-NEXT: fmov d1, x9
		; CHECK-NEXT: fmov d0, x8
		; CHECK-NEXT: sshl d0, d0, d1
		; CHECK-NEXT: sshr d0, d0, #1
		; CHECK-NEXT: fmov x0, d0
		; CHECK-NEXT: ret
		%tmp1 = load i32, ptr %A