Commit 0dc6c249 authored by cdevadas's avatar cdevadas
Browse files

[AMDGPU] Invert the handling of skip insertion.

The current implementation of skip insertion (SIInsertSkip) makes it a
mandatory pass required for correctness. Initially, the idea was to
have an optional pass. This patch inserts the s_cbranch_execz upfront
during SILowerControlFlow to skip over the sections of code when no
lanes are active. Later, SIRemoveShortExecBranches removes the skips
for short branches, unless there is a sideeffect and the skip branch is
really necessary.

This new pass will replace the handling of skip insertion in the
existing SIInsertSkip Pass.

Differential revision: https://reviews.llvm.org/D68092
parent 064859bd
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -156,6 +156,9 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowPass(PassRegistry &);
extern char &SILowerControlFlowID;

void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
extern char &SIRemoveShortExecBranchesID;

void initializeSIInsertSkipsPass(PassRegistry &);
extern char &SIInsertSkipsPassID;

+2 −0
Original line number Diff line number Diff line
@@ -228,6 +228,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
  initializeSIModeRegisterPass(*PR);
  initializeSIWholeQuadModePass(*PR);
  initializeSILowerControlFlowPass(*PR);
  initializeSIRemoveShortExecBranchesPass(*PR);
  initializeSIInsertSkipsPass(*PR);
  initializeSIMemoryLegalizerPass(*PR);
  initializeSIOptimizeExecMaskingPass(*PR);
@@ -993,6 +994,7 @@ void GCNPassConfig::addPreEmitPass() {
  // be better for it to emit S_NOP <N> when possible.
  addPass(&PostRAHazardRecognizerID);

  addPass(&SIRemoveShortExecBranchesID);
  addPass(&SIInsertSkipsPassID);
  addPass(&BranchRelaxationPassID);
}
+1 −0
Original line number Diff line number Diff line
@@ -116,6 +116,7 @@ add_llvm_target(AMDGPUCodeGen
  SIOptimizeExecMaskingPreRA.cpp
  SIPeepholeSDWA.cpp
  SIRegisterInfo.cpp
  SIRemoveShortExecBranches.cpp
  SIShrinkInstructions.cpp
  SIWholeQuadMode.cpp
  GCNILPSched.cpp
+4 −1
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"

static cl::opt<unsigned> SkipThresholdFlag(
  "amdgpu-skip-threshold",
  "amdgpu-skip-threshold-legacy",
  cl::desc("Number of instructions before jumping over divergent control flow"),
  cl::init(12), cl::Hidden);

@@ -466,6 +466,9 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
      MachineInstr &MI = *I;

      switch (MI.getOpcode()) {
      case AMDGPU::S_CBRANCH_EXECZ:
        ExecBranchStack.push_back(MI.getOperand(0).getMBB());
        break;
      case AMDGPU::SI_MASK_BRANCH:
        ExecBranchStack.push_back(MI.getOperand(0).getMBB());
        MadeChange |= skipMaskBranch(MI, MBB);
+5 −5
Original line number Diff line number Diff line
@@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
    BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
    .addReg(Tmp, RegState::Kill);

  // Insert a pseudo terminator to help keep the verifier happy. This will also
  // be used later when inserting skips.
  MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
  // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
  // during SIRemoveShortExecBranches.
  MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
                            .add(MI.getOperand(2));

  if (!LIS) {
@@ -323,7 +323,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
    .addReg(DstReg);

  MachineInstr *Branch =
    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
      BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
          .addMBB(DestBB);

  if (!LIS) {
Loading