Commit 92164cf2 authored by Sjoerd Meijer's avatar Sjoerd Meijer
Browse files

Recommit "[HardwareLoops] Optimisation remarks"

With a few things fixed:
- initialisaiton of the optimisation remark pass (this was causing the buildbot
  failures on PPC),
- a test case.

Differential Revision: https://reviews.llvm.org/D69660
parent edfb8eea
Loading
Loading
Loading
Loading
+82 −23
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -75,8 +76,44 @@ ForceGuardLoopEntry(

STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");

#ifndef NDEBUG
static void debugHWLoopFailure(const StringRef DebugMsg,
    Instruction *I) {
  dbgs() << "HWLoops: " << DebugMsg;
  if (I)
    dbgs() << ' ' << *I;
  else
    dbgs() << '.';
  dbgs() << '\n';
}
#endif

static OptimizationRemarkAnalysis
createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
  Value *CodeRegion = L->getHeader();
  DebugLoc DL = L->getStartLoc();

  if (I) {
    CodeRegion = I->getParent();
    // If there is no debug location attached to the instruction, revert back to
    // using the loop's.
    if (I->getDebugLoc())
      DL = I->getDebugLoc();
  }

  OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
  R << "hardware-loop not created: ";
  return R;
}

namespace {

  void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
      OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
    LLVM_DEBUG(debugHWLoopFailure(Msg, I));
    ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
  }

  using TTI = TargetTransformInfo;

  class HardwareLoops : public FunctionPass {
@@ -97,6 +134,7 @@ namespace {
      AU.addRequired<ScalarEvolutionWrapperPass>();
      AU.addRequired<AssumptionCacheTracker>();
      AU.addRequired<TargetTransformInfoWrapperPass>();
      AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
    }

    // Try to convert the given Loop into a hardware loop.
@@ -110,6 +148,7 @@ namespace {
    ScalarEvolution *SE = nullptr;
    LoopInfo *LI = nullptr;
    const DataLayout *DL = nullptr;
    OptimizationRemarkEmitter *ORE = nullptr;
    const TargetTransformInfo *TTI = nullptr;
    DominatorTree *DT = nullptr;
    bool PreserveLCSSA = false;
@@ -143,8 +182,9 @@ namespace {

  public:
    HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
                 const DataLayout &DL) :
      SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
                 const DataLayout &DL,
                 OptimizationRemarkEmitter *ORE) :
      SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
      ExitCount(Info.ExitCount),
      CountType(Info.CountType),
      ExitBranch(Info.ExitBranch),
@@ -157,6 +197,7 @@ namespace {
  private:
    ScalarEvolution &SE;
    const DataLayout &DL;
    OptimizationRemarkEmitter *ORE = nullptr;
    Loop *L                 = nullptr;
    Module *M               = nullptr;
    const SCEV *ExitCount   = nullptr;
@@ -182,6 +223,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  DL = &F.getParent()->getDataLayout();
  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
  LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
@@ -201,16 +243,27 @@ bool HardwareLoops::runOnFunction(Function &F) {
// converted and the parent loop doesn't support containing a hardware loop.
bool HardwareLoops::TryConvertLoop(Loop *L) {
  // Process nested loops first.
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
    if (TryConvertLoop(*I))
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    if (TryConvertLoop(*I)) {
      reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
                          ORE, L);
      return true; // Stop search.
    }
  }

  HardwareLoopInfo HWLoopInfo(L);
  if (!HWLoopInfo.canAnalyze(*LI))
  if (!HWLoopInfo.canAnalyze(*LI)) {
    reportHWLoopFailure("cannot analyze loop, irreducible control flow",
                        "HWLoopCannotAnalyze", ORE, L);
    return false;
  }

  if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
      ForceHardwareLoops) {
  if (!ForceHardwareLoops &&
      !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
    reportHWLoopFailure("it's not profitable to create a hardware-loop",
                        "HWLoopNotProfitable", ORE, L);
    return false;
  }

  // Allow overriding of the counter width and loop decrement value.
  if (CounterBitWidth.getNumOccurrences())
@@ -225,17 +278,19 @@ bool HardwareLoops::TryConvertLoop(Loop *L) {
  return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
}

  return false;
}

bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {

  Loop *L = HWLoopInfo.L;
  LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);

  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
                                          ForceHardwareLoopPHI))
                                          ForceHardwareLoopPHI)) {
    // TODO: there can be many reasons a loop is not considered a
    // candidate, so we should let isHardwareLoopCandidate fill in the
    // reason and then report a better message here.
    reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
    return false;
  }

  assert(
      (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
@@ -249,7 +304,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
  if (!Preheader)
    return false;

  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
  HWLoop.Create();
  ++NumHWLoops;
  return true;
@@ -259,8 +314,11 @@ void HardwareLoop::Create() {
  LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");

  Value *LoopCountInit = InitLoopCount();
  if (!LoopCountInit)
  if (!LoopCountInit) {
    reportHWLoopFailure("could not safely create a loop count expression",
                        "HWLoopNotSafe", ORE, L);
    return;
  }

  InsertIterationSetup(LoopCountInit);

@@ -458,6 +516,7 @@ INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)

FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
+3 −0
Original line number Diff line number Diff line
@@ -52,6 +52,9 @@
; CHECK-NEXT:      Dominator Tree Construction
; CHECK-NEXT:      Natural Loop Information
; CHECK-NEXT:      Scalar Evolution Analysis
; CHECK-NEXT:      Lazy Branch Probability Analysis
; CHECK-NEXT:      Lazy Block Frequency Analysis
; CHECK-NEXT:      Optimization Remark Emitter
; CHECK-NEXT:      Hardware Loop Insertion
; CHECK-NEXT:      Scalar Evolution Analysis
; CHECK-NEXT:      Loop Pass Manager
+22 −3
Original line number Diff line number Diff line
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops %s -S -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-LLC
; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops %s -S -o - | \
; RUN:     FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - | \
; RUN:     FileCheck %s --check-prefix=CHECK-LLC
; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | \
; RUN:     llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops \
; RUN:     -pass-remarks-analysis=hardware-loops  %s -S -o - 2>&1 | \
; RUN:     FileCheck %s --check-prefix=CHECK-REMARKS


; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop


; CHECK-LABEL: early_exit
; CHECK-NOT: llvm.set.loop.iterations