Commit 609a489e authored by Jessica Paquette's avatar Jessica Paquette
Browse files

[AArch64][GlobalISel] Reland SLT/SGT TBNZ optimization

The issue in the previous commits was that we swap the LHS and RHS while
looking for the constant. In SLT/SGT, the constant must be on the RHS, or the
optimization is invalid.

Move the swapping logic after the check for the SLT/SGT case and update tests.

Original commits:

d78cefb1
a3738414
parent 6370c7c1
Loading
Loading
Loading
Loading
+37 −8
Original line number Diff line number Diff line
@@ -1222,26 +1222,55 @@ bool AArch64InstructionSelector::selectCompareBranch(
  Register LHS = CCMI->getOperand(2).getReg();
  Register RHS = CCMI->getOperand(3).getReg();
  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
  if (!VRegAndVal)
    std::swap(RHS, LHS);

  MachineIRBuilder MIB(I);
  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
  MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);

  // When we can emit a TB(N)Z, prefer that.
  //
  // Handle non-commutative condition codes first.
  // Note that we don't want to do this when we have a G_AND because it can
  // become a tst. The tst will make the test bit in the TB(N)Z redundant.
  if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
    int64_t C = VRegAndVal->Value;

    // When we have a greater-than comparison, we can just test if the msb is
    // zero.
    if (C == -1 && Pred == CmpInst::ICMP_SGT) {
      uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
      emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
      I.eraseFromParent();
      return true;
    }

    // When we have a less than comparison, we can just test if the msb is not
    // zero.
    if (C == 0 && Pred == CmpInst::ICMP_SLT) {
      uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
      emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
      I.eraseFromParent();
      return true;
    }
  }

  if (!VRegAndVal) {
    std::swap(RHS, LHS);
    VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
    LHSMI = getDefIgnoringCopies(LHS, MRI);
  }

  if (!VRegAndVal || VRegAndVal->Value != 0) {
    // If we can't select a CBZ then emit a cmp + Bcc.
    if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
                            CCMI->getOperand(1), MIB))
      return false;
    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
        (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
    I.eraseFromParent();
    return true;
  }

  // Try to fold things into the branch.
  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
  MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
  // Try to emit a TB(N)Z for an eq or ne condition.
  if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
                                 MIB)) {
    I.eraseFromParent();
+151 −0
Original line number Diff line number Diff line
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
#
# Test that we can produce a TBNZ when we have a slt compare against 0.
#
# The bit tested should be the size of the test register minus 1.
#

...
---
name:            tbnzx_slt
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: tbnzx_slt
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr64 = COPY $x0
  ; CHECK:   TBNZX %copy, 63, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %zero:gpr(s64) = G_CONSTANT i64 0
    %cmp:gpr(s32) = G_ICMP intpred(slt), %copy(s64), %zero
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            tbnzw_slt
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: tbnzw_slt
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr32 = COPY $w0
  ; CHECK:   TBNZW %copy, 31, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %zero:gpr(s32) = G_CONSTANT i32 0
    %cmp:gpr(s32) = G_ICMP intpred(slt), %copy(s32), %zero
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            no_tbnz_not_zero
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: no_tbnz_not_zero
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr32sp = COPY $w0
  ; CHECK:   $wzr = SUBSWri %copy, 1, 0, implicit-def $nzcv
  ; CHECK:   Bcc 11, %bb.1, implicit $nzcv
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %one:gpr(s32) = G_CONSTANT i32 1
    %cmp:gpr(s32) = G_ICMP intpred(slt), %copy(s32), %one
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            dont_fold_and
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_fold_and
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr64 = COPY $x0
  ; CHECK:   $xzr = ANDSXri %copy, 8000, implicit-def $nzcv
  ; CHECK:   %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
  ; CHECK:   TBNZW %cmp, 0, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0
    %c:gpr(s64) = G_CONSTANT i64 8
    %and:gpr(s64) = G_AND %copy, %bit
    %cmp:gpr(s32) = G_ICMP intpred(slt), %and(s64), %zero
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            dont_commute
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_commute
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr64 = COPY $x0
  ; CHECK:   %zero:gpr64 = COPY $xzr
  ; CHECK:   $xzr = SUBSXrr %zero, %copy, implicit-def $nzcv
  ; CHECK:   %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
  ; CHECK:   TBNZW %cmp, 0, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %zero:gpr(s64) = G_CONSTANT i64 0
    %cmp:gpr(s32) = G_ICMP intpred(slt), %zero, %copy(s64)
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR
+151 −0
Original line number Diff line number Diff line
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
#
# Test that we can produce a tbz when we have a sgt compare against -1.
#
# The bit tested should be the size of the test register minus 1.
#

...
---
name:            tbzx_sgt
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: tbzx_sgt
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr64 = COPY $x0
  ; CHECK:   TBZX %copy, 63, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %negative_one:gpr(s64) = G_CONSTANT i64 -1
    %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s64), %negative_one
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            tbzw_sgt
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: tbzw_sgt
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr32 = COPY $w0
  ; CHECK:   TBZW %copy, 31, %bb.1
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %negative_one:gpr(s32) = G_CONSTANT i32 -1
    %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s32), %negative_one
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            no_tbz_not_negative_one
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: no_tbz_not_negative_one
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr32sp = COPY $w0
  ; CHECK:   $wzr = SUBSWri %copy, 1, 0, implicit-def $nzcv
  ; CHECK:   Bcc 12, %bb.1, implicit $nzcv
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %one:gpr(s32) = G_CONSTANT i32 1
    %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s32), %one
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            dont_fold_and
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_fold_and
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr64 = COPY $x0
  ; CHECK:   %negative_one:gpr64 = MOVi64imm -1
  ; CHECK:   %and:gpr64common = ANDXri %copy, 8000
  ; CHECK:   $xzr = SUBSXrr %and, %negative_one, implicit-def $nzcv
  ; CHECK:   Bcc 12, %bb.1, implicit $nzcv
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %negative_one:gpr(s64) = G_CONSTANT i64 -1
    %c:gpr(s64) = G_CONSTANT i64 8
    %and:gpr(s64) = G_AND %copy, %bit
    %cmp:gpr(s32) = G_ICMP intpred(sgt), %and(s64), %negative_one
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            dont_commute
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_commute
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK:   %copy:gpr64 = COPY $x0
  ; CHECK:   %negative_one:gpr64 = MOVi64imm -1
  ; CHECK:   $xzr = SUBSXrr %negative_one, %copy, implicit-def $nzcv
  ; CHECK:   Bcc 12, %bb.1, implicit $nzcv
  ; CHECK:   B %bb.0
  ; CHECK: bb.1:
  ; CHECK:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %negative_one:gpr(s64) = G_CONSTANT i64 -1
    %cmp:gpr(s32) = G_ICMP intpred(sgt), %negative_one, %copy(s64)
    %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
    G_BRCOND %cmp_trunc(s1), %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR