Commit 7212f657 authored by Jessica Paquette's avatar Jessica Paquette
Browse files

[AArch64][GlobalISel] Fold G_LSHR into test bit calculation

Add support for walking through G_LSHR in `getTestBitReg`. Equivalent to the
code in `getTestBitOperand` in AArch64ISelLowering.

```
(tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
```

Differential Revision: https://reviews.llvm.org/D74077
parent 96ea377e
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -1040,6 +1040,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
      break;
    }
    case TargetOpcode::G_ASHR:
    case TargetOpcode::G_LSHR:
    case TargetOpcode::G_SHL: {
      TestReg = MI->getOperand(1).getReg();
      auto VRegAndVal =
@@ -1082,6 +1083,13 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
      if (Bit >= TestRegSize)
        Bit = TestRegSize - 1;
      break;
    case TargetOpcode::G_LSHR:
      // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
      if ((Bit + *C) < TestRegSize) {
        NextReg = TestReg;
        Bit = Bit + *C;
      }
      break;
    case TargetOpcode::G_XOR:
      // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
      // appropriate.
+141 −0
Original line number Diff line number Diff line
@@ -259,3 +259,144 @@ body: |
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            fold_lshr
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: fold_lshr
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr32 = COPY $w0
  ; CHECK:   TBNZW %copy, 4, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %bit:gpr(s32) = G_CONSTANT i32 8
    %zero:gpr(s32) = G_CONSTANT i32 0

    ; We should get 4 as the test bit.
    %fold_cst:gpr(s32) = G_CONSTANT i32 1
    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst

    %and:gpr(s32) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            fold_lshr_2
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: fold_lshr_2
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr64 = COPY $x0
  ; CHECK:   TBNZX %copy, 32, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0

    ; We're testing a s64.
    ; 3 + 29 = 32, which is less than 63, so we can fold.
    %fold_cst:gpr(s64) = G_CONSTANT i64 29
    %fold_me:gpr(s64) = G_LSHR %copy, %fold_cst

    %and:gpr(s64) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            dont_fold_lshr
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_fold_lshr
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr32 = COPY $w0
  ; CHECK:   %fold_cst:gpr32 = MOVi32imm 29
  ; CHECK:   %fold_me:gpr32 = LSRVWr %copy, %fold_cst
  ; CHECK:   TBNZW %fold_me, 3, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %bit:gpr(s32) = G_CONSTANT i32 8
    %zero:gpr(s32) = G_CONSTANT i32 0

    ; We're testing a s32.
    ; 3 + 29 = 32, which is greater than 31, so we don't fold.
    %fold_cst:gpr(s32) = G_CONSTANT i32 29
    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst

    %and:gpr(s32) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            lshr_negative
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: lshr_negative
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr32 = COPY $w0
  ; CHECK:   TBNZW %copy, 2, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %bit:gpr(s32) = G_CONSTANT i32 8
    %zero:gpr(s32) = G_CONSTANT i32 0

    ; Constant becomes very large and wraps around. Since it's larger than the
    ; bit width, that means the LSHR is poison, so we can still fold.
    %fold_cst:gpr(s32) = G_CONSTANT i32 -1
    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst

    %and:gpr(s32) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR