Commit a9bd3d37 authored by Bjorn Pettersson's avatar Bjorn Pettersson
Browse files

[NewPM] Add ExtraVectorizerPasses support

As it looks like NewPM generally is using SimpleLoopUnswitch
instead of LoopUnswitch, this patch also use SimpleLoopUnswitch
in the ExtraVectorizerPasses sequence (compared with LegacyPM
which use the LoopUnswitch pass).

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D95457
parent 5f1d4d47
Loading
Loading
Loading
Loading
+28 −1
Original line number Diff line number Diff line
@@ -287,6 +287,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
  MergeFunctions = false;
  UniqueLinkageNames = false;
}
extern cl::opt<bool> ExtraVectorizerPasses;

extern cl::opt<bool> EnableConstraintElimination;
extern cl::opt<bool> EnableGVNHoist;
@@ -1255,6 +1256,28 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
  // Cleanup after the loop optimization passes.
  OptimizePM.addPass(InstCombinePass());

  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
    // At higher optimization levels, try to clean up any runtime overlap and
    // alignment checks inserted by the vectorizer. We want to track correlated
    // runtime checks for two inner loops in the same outer loop, fold any
    // common computations, hoist loop-invariant aspects out of any outer loop,
    // and unswitch the runtime checks if possible. Once hoisted, we may have
    // dead (or speculatable) control flows or more combining opportunities.
    OptimizePM.addPass(EarlyCSEPass());
    OptimizePM.addPass(CorrelatedValuePropagationPass());
    OptimizePM.addPass(InstCombinePass());
    LoopPassManager LPM(DebugLogging);
    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
    LPM.addPass(
        SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
    OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
    OptimizePM.addPass(createFunctionToLoopPassAdaptor(
        std::move(LPM), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
        DebugLogging));
    OptimizePM.addPass(SimplifyCFGPass());
    OptimizePM.addPass(InstCombinePass());
  }

  // Now that we've formed fast to execute loop structures, we do further
  // optimizations. These are run afterward as they might block doing complex
  // analyses and transforms such as what are needed for loop vectorization.
@@ -1274,8 +1297,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
                                         .sinkCommonInsts(true)));

  // Optimize parallel scalar instruction chains into SIMD instructions.
  if (PTO.SLPVectorization)
  if (PTO.SLPVectorization) {
    OptimizePM.addPass(SLPVectorizerPass());
    if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
      OptimizePM.addPass(EarlyCSEPass());
    }
  }

  // Enhance/cleanup vector code.
  OptimizePM.addPass(VectorCombinePass());
+1 −1
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ UseGVNAfterVectorization("use-gvn-after-vectorization",
  cl::init(false), cl::Hidden,
  cl::desc("Run GVN instead of Early CSE after vectorization passes"));

static cl::opt<bool> ExtraVectorizerPasses(
cl::opt<bool> ExtraVectorizerPasses(
    "extra-vectorizer-passes", cl::init(false), cl::Hidden,
    cl::desc("Run cleanup optimization passes after vectorization."));

+24 −2
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
; RUN: opt -enable-new-pm=0 -O2 -vectorize-loops=0       -debug-pass=Structure  < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_FORCE_OFF
; RUN: opt -disable-verify -debug-pass-manager -passes='default<O1>' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O1
; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2
; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2_EXTRA

; REQUIRES: asserts

@@ -64,6 +65,27 @@
; NEWPM_O2:        Running pass: SLPVectorizerPass
; NEWPM_O2:        Running pass: VectorCombinePass

define void @f() {
  ret void
; NEWPM_O2_EXTRA-LABEL: Running pass: LoopVectorizePass
; NEWPM_O2_EXTRA: Running pass: EarlyCSEPass
; NEWPM_O2_EXTRA: Running pass: CorrelatedValuePropagationPass
; NEWPM_O2_EXTRA: Running pass: InstCombinePass
; NEWPM_O2_EXTRA: Running pass: LICMPass
; NEWPM_O2_EXTRA: Running pass: SimpleLoopUnswitchPass
; NEWPM_O2_EXTRA: Running pass: SimplifyCFGPass
; NEWPM_O2_EXTRA: Running pass: InstCombinePass
; NEWPM_O2_EXTRA: Running pass: SLPVectorizerPass
; NEWPM_O2_EXTRA: Running pass: EarlyCSEPass
; NEWPM_O2_EXTRA: Running pass: VectorCombinePass

define i64 @f(i1 %cond) {
entry:
  br label %loop

loop:
  %i = phi i64 [ 0, %entry ], [ %inc, %loop ]
  %inc = add i64 %i, 1
  br i1 %cond, label %loop, label %exit

exit:
  ret i64 %i
}