Unverified Commit 4266815f authored by David Green's avatar David Green Committed by GitHub
Browse files

[AArch64] Convert negative constant aarch64_neon_sshl to VASHR (#68918)

In replacing shifts by splat with constant shifts, we can handle
negative shifts by flipping the sign and using a VASHR or VLSHR.
parent 658ed58d
Loading
Loading
Loading
Loading
+8 −3
Original line number Diff line number Diff line
@@ -19100,8 +19100,13 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
  case Intrinsic::aarch64_neon_sshl:
  case Intrinsic::aarch64_neon_ushl:
    // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
    // left shift for positive shift amounts. Below, we only replace the current
    // node with VSHL, if this condition is met.
    // left shift for positive shift amounts. For negative shifts we can use a
    // VASHR/VLSHR as appropiate.
    if (ShiftAmount < 0) {
      Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
                                                   : AArch64ISD::VLSHR;
      ShiftAmount = -ShiftAmount;
    } else
      Opcode = AArch64ISD::VSHL;
    IsRightShift = false;
    break;
+4 −9
Original line number Diff line number Diff line
@@ -2130,9 +2130,8 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon.ushll4s_neg_constant_shift:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    movi.2d v1, #0xffffffffffffffff
; CHECK-NEXT:    ushll.4s v0, v0, #0
; CHECK-NEXT:    ushl.4s v0, v0, v1
; CHECK-NEXT:    ushr.4s v0, v0, #1
; CHECK-NEXT:    ret
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
@@ -2250,9 +2249,8 @@ define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind {
define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon.sshl16b_neg_constant_shift:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi.16b v1, #254
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    sshl.16b v0, v0, v1
; CHECK-NEXT:    sshr.16b v0, v0, #2
; CHECK-NEXT:    ret
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
@@ -2300,9 +2298,8 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon.sshll4s_neg_constant_shift:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    movi.2d v1, #0xffffffffffffffff
; CHECK-NEXT:    sshll.4s v0, v0, #0
; CHECK-NEXT:    sshl.4s v0, v0, v1
; CHECK-NEXT:    sshr.4s v0, v0, #1
; CHECK-NEXT:    ret
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
@@ -2377,10 +2374,8 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind {
; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr w8, [x0]
; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
; CHECK-NEXT:    fmov d1, x9
; CHECK-NEXT:    fmov d0, x8
; CHECK-NEXT:    sshl d0, d0, d1
; CHECK-NEXT:    sshr d0, d0, #1
; CHECK-NEXT:    fmov x0, d0
; CHECK-NEXT:    ret
  %tmp1 = load i32, ptr %A