Commit 8232497c authored by Diogo Sampaio's avatar Diogo Sampaio
Browse files

[ARM][THUMB2] Allow emitting T3 types of add and sub

Summary:
This patch allows to emit thumb2 add and sub
instructions with 12 bit immediates in the
emitT2RegPlusImmediate function.
- Splitting parts of the D70680

Reviewers: eli.friedman, olista01, efriedma

Reviewed By: efriedma

Subscribers: efriedma, kristof.beyls, hiraditya, dmgreen, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71361
parent 4a188fdf
Loading
Loading
Loading
Loading
+33 −42
Original line number Diff line number Diff line
@@ -303,10 +303,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
      continue;
    }

    bool HasCCOut = true;
    if (BaseReg == ARM::SP) {
      // sub sp, sp, #imm7
      if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
    assert((DestReg != ARM::SP || BaseReg == ARM::SP) &&
           "Writing to SP, from other register.");

    // Try to use T1, as it smaller
    if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) {
      assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
      Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
@@ -314,32 +315,23 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
          .addImm(ThisVal / 4)
          .setMIFlags(MIFlags)
          .add(predOps(ARMCC::AL));
        NumBytes = 0;
        continue;
      break;
    }
    bool HasCCOut = true;
    int ImmIsT2SO = ARM_AM::getT2SOImmVal(ThisVal);

      // sub rd, sp, so_imm
    Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
        NumBytes = 0;
      } else {
        // FIXME: Move this to ARMAddressingModes.h?
        unsigned RotAmt = countLeadingZeros(ThisVal);
        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
        NumBytes &= ~ThisVal;
        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
               "Bit extraction didn't work?");
      }
    } else {
      assert(DestReg != ARM::SP && BaseReg != ARM::SP);
      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
    // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm
    if (ImmIsT2SO != -1) {
      NumBytes = 0;
    } else if (ThisVal < 4096) {
      // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp,
      // sp, imm12
      Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
      HasCCOut = false;
      NumBytes = 0;
    } else {
      // Use one T2 instruction to reduce NumBytes
      // FIXME: Move this to ARMAddressingModes.h?
      unsigned RotAmt = countLeadingZeros(ThisVal);
      ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
@@ -347,7 +339,6 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
      assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
             "Bit extraction didn't work?");
    }
    }

    // Build the new ADD / SUB.
    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+88 −0
Original line number Diff line number Diff line
--- |
  ; RUN: llc --run-pass=prologepilog -o - %s | FileCheck %s
  ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7
  ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 4008, 14, $noreg

  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
  target triple = "thumbv7-none-none-eabi"
  define void @foo() #0 {
  entry:
    %v = alloca [4000 x i8], align 1
    %s = alloca i8*, align 4
    %0 = bitcast [4000 x i8]* %v to i8*
    store i8* %0, i8** %s, align 4
    %1 = load i8*, i8** %s, align 4
    call void @bar(i8* %1)
    ret void
  }
  declare void @bar(i8*) #1
  ; Function Attrs: nounwind
  declare void @llvm.stackprotector(i8*, i8**) #2

  attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #2 = { nounwind }

  !llvm.module.flags = !{!0, !1}
  !llvm.ident = !{!2}

  !0 = !{i32 1, !"wchar_size", i32 4}
  !1 = !{i32 1, !"min_enum_size", i32 4}
  !2 = !{!"clang version 10.0.0 (git@github.com:llvm/llvm-project.git ee219345881bdf2c144d40731f055e7b36bc8bce)"}

...
---
name:            foo
alignment:       2
exposesReturnsTwice: false
legalized:       false
regBankSelected: false
selected:        false
failedISel:      false
tracksRegLiveness: true
hasWinCFI:       false
registers:       []
liveins:         []
frameInfo:
  isFrameAddressTaken: false
  isReturnAddressTaken: false
  hasStackMap:     false
  hasPatchPoint:   false
  stackSize:       0
  offsetAdjustment: 0
  maxAlignment:    4
  adjustsStack:    true
  hasCalls:        true
  stackProtector:  ''
  maxCallFrameSize: 0
  cvBytesOfCalleeSavedRegisters: 0
  hasOpaqueSPAdjustment: false
  hasVAStart:      false
  hasMustTailInVarArgFunc: false
  localFrameSize:  4004
  savePoint:       ''
  restorePoint:    ''
fixedStack:      []
stack:
  - { id: 0, name: v, type: default, offset: 0, size: 4000, alignment: 1,
      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
      local-offset: -4000, debug-info-variable: '', debug-info-expression: '',
      debug-info-location: '' }
  - { id: 1, name: s, type: default, offset: 0, size: 4, alignment: 4,
      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
      local-offset: -4004, debug-info-variable: '', debug-info-expression: '',
      debug-info-location: '' }
callSites:       []
constants:       []
machineFunctionInfo: {}
body:             |
  bb.0.entry:
    renamable $r0 = t2ADDri %stack.0.v, 0, 14, $noreg, $noreg
    t2STRi12 killed renamable $r0, %stack.1.s, 0, 14, $noreg :: (store 4 into %ir.s)
    renamable $r0 = t2LDRi12 %stack.1.s, 0, 14, $noreg :: (dereferenceable load 4 from %ir.s)
    ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp
    tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp
    ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp
    tBX_RET 14, $noreg

...
+1 −1
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ target triple = "thumbv7-apple-ios0.0.0"
; CHECK: main
; CHECK: vmov.f64
; Adjust SP for the large call
; CHECK: sub sp,
; CHECK: subw sp, sp, #3720
; Store to call frame + #8
; CHECK: vstr{{.*\[}}sp, #8]
; Don't clobber that store until the call.
+1 −2
Original line number Diff line number Diff line
@@ -118,8 +118,7 @@ body: |
    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -28
    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r5, -32
    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -36
    ; CHECK-NEXT: $sp = frame-setup t2SUBri killed $sp, 1216, 14, $noreg, $noreg
    ; CHECK-NEXT: $sp = frame-setup tSUBspi $sp, 1, 14, $noreg
    ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 1220, 14, $noreg
    ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1256
    ; CHECK-NEXT: $r0 = IMPLICIT_DEF
    ; CHECK-NEXT: $r1 = IMPLICIT_DEF