Commit 153a0b89 authored by Hiroshi Yamauchi's avatar Hiroshi Yamauchi
Browse files

[PGO][PGSO] Add profile guided size optimization to the X86 LEA fixup.

Differential Revision: https://reviews.llvm.org/D83330
parent 8779b114
Loading
Loading
Loading
Loading
+17 −2
Original line number Diff line number Diff line
@@ -16,8 +16,11 @@
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Debug.h"
@@ -111,6 +114,12 @@ public:
        MachineFunctionProperties::Property::NoVRegs);
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<ProfileSummaryInfoWrapperPass>();
    AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
    MachineFunctionPass::getAnalysisUsage(AU);
  }

private:
  TargetSchedModel TSM;
  const X86InstrInfo *TII = nullptr;
@@ -205,21 +214,27 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
  TSM.init(&ST);
  TII = ST.getInstrInfo();
  TRI = ST.getRegisterInfo();
  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  auto *MBFI = (PSI && PSI->hasProfileSummary())
                   ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
                   : nullptr;

  LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
  for (MachineBasicBlock &MBB : MF) {
    // First pass. Try to remove or optimize existing LEAs.
    bool OptIncDecPerBB =
        OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
    for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
      if (!isLEA(I->getOpcode()))
        continue;

      if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
      if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
        continue;

      if (IsSlowLEA)
        processInstructionForSlowLEA(I, MBB);
      else if (IsSlow3OpsLEA)
        processInstrForSlow3OpLEA(I, MBB, OptIncDec);
        processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
    }

    // Second pass for creating LEAs. This may reverse some of the
+1 −0
Original line number Diff line number Diff line
@@ -58,6 +58,7 @@ namespace {
    void getAnalysisUsage(AnalysisUsage &AU) const override {
      AU.addRequired<ProfileSummaryInfoWrapperPass>();
      AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
      AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
      MachineFunctionPass::getAnalysisUsage(AU);
    }

+24 −50
Original line number Diff line number Diff line
@@ -109,31 +109,18 @@ for.end:
}

define void @foo_pgso(i32 inreg %dns) !prof !14 {
; SLOW-LABEL: foo_pgso:
; SLOW:       # %bb.0: # %entry
; SLOW-NEXT:    xorl %ecx, %ecx
; SLOW-NEXT:    decl %ecx
; SLOW-NEXT:  .LBB4_1: # %for.body
; SLOW-NEXT:    # =>This Inner Loop Header: Depth=1
; SLOW-NEXT:    movzwl %cx, %edx
; SLOW-NEXT:    decl %ecx
; SLOW-NEXT:    cmpl %eax, %edx
; SLOW-NEXT:    jl .LBB4_1
; SLOW-NEXT:  # %bb.2: # %for.end
; SLOW-NEXT:    retl
;
; FAST-LABEL: foo_pgso:
; FAST:       # %bb.0: # %entry
; FAST-NEXT:    xorl %ecx, %ecx
; FAST-NEXT:    decl %ecx
; FAST-NEXT:  .LBB4_1: # %for.body
; FAST-NEXT:    # =>This Inner Loop Header: Depth=1
; FAST-NEXT:    movzwl %cx, %edx
; FAST-NEXT:    addl $-1, %ecx
; FAST-NEXT:    cmpl %eax, %edx
; FAST-NEXT:    jl .LBB4_1
; FAST-NEXT:  # %bb.2: # %for.end
; FAST-NEXT:    retl
; CHECK-LABEL: foo_pgso:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xorl %ecx, %ecx
; CHECK-NEXT:    decl %ecx
; CHECK-NEXT:  .LBB4_1: # %for.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movzwl %cx, %edx
; CHECK-NEXT:    decl %ecx
; CHECK-NEXT:    cmpl %eax, %edx
; CHECK-NEXT:    jl .LBB4_1
; CHECK-NEXT:  # %bb.2: # %for.end
; CHECK-NEXT:    retl
entry:
  br label %for.body

@@ -149,31 +136,18 @@ for.end:
}

define void @bar_pgso(i32 inreg %dns) !prof !14 {
; SLOW-LABEL: bar_pgso:
; SLOW:       # %bb.0: # %entry
; SLOW-NEXT:    xorl %ecx, %ecx
; SLOW-NEXT:    incl %ecx
; SLOW-NEXT:  .LBB5_1: # %for.body
; SLOW-NEXT:    # =>This Inner Loop Header: Depth=1
; SLOW-NEXT:    movzwl %cx, %edx
; SLOW-NEXT:    incl %ecx
; SLOW-NEXT:    cmpl %eax, %edx
; SLOW-NEXT:    jl .LBB5_1
; SLOW-NEXT:  # %bb.2: # %for.end
; SLOW-NEXT:    retl
;
; FAST-LABEL: bar_pgso:
; FAST:       # %bb.0: # %entry
; FAST-NEXT:    xorl %ecx, %ecx
; FAST-NEXT:    incl %ecx
; FAST-NEXT:  .LBB5_1: # %for.body
; FAST-NEXT:    # =>This Inner Loop Header: Depth=1
; FAST-NEXT:    movzwl %cx, %edx
; FAST-NEXT:    addl $1, %ecx
; FAST-NEXT:    cmpl %eax, %edx
; FAST-NEXT:    jl .LBB5_1
; FAST-NEXT:  # %bb.2: # %for.end
; FAST-NEXT:    retl
; CHECK-LABEL: bar_pgso:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xorl %ecx, %ecx
; CHECK-NEXT:    incl %ecx
; CHECK-NEXT:  .LBB5_1: # %for.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movzwl %cx, %edx
; CHECK-NEXT:    incl %ecx
; CHECK-NEXT:    cmpl %eax, %edx
; CHECK-NEXT:    jl .LBB5_1
; CHECK-NEXT:  # %bb.2: # %for.end
; CHECK-NEXT:    retl
entry:
  br label %for.body