[AMDGPU] Invert the handling of skip insertion. (0dc6c249) · Commits · llvm-doe / llvm-project

llvm/lib/Target/AMDGPU/AMDGPU.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -156,6 +156,9 @@ extern char &SIWholeQuadModeID;
		void initializeSILowerControlFlowPass(PassRegistry &);
		extern char &SILowerControlFlowID;

		void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
		extern char &SIRemoveShortExecBranchesID;

		void initializeSIInsertSkipsPass(PassRegistry &);
		extern char &SIInsertSkipsPassID;

+2 −0

Original line number	Diff line number	Diff line
		@@ -228,6 +228,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
		initializeSIModeRegisterPass(*PR);
		initializeSIWholeQuadModePass(*PR);
		initializeSILowerControlFlowPass(*PR);
		initializeSIRemoveShortExecBranchesPass(*PR);
		initializeSIInsertSkipsPass(*PR);
		initializeSIMemoryLegalizerPass(*PR);
		initializeSIOptimizeExecMaskingPass(*PR);
		@@ -993,6 +994,7 @@ void GCNPassConfig::addPreEmitPass() {
		// be better for it to emit S_NOP <N> when possible.
		addPass(&PostRAHazardRecognizerID);

		addPass(&SIRemoveShortExecBranchesID);
		addPass(&SIInsertSkipsPassID);
		addPass(&BranchRelaxationPassID);
		}

+1 −0

Original line number	Diff line number	Diff line
		@@ -116,6 +116,7 @@ add_llvm_target(AMDGPUCodeGen
		SIOptimizeExecMaskingPreRA.cpp
		SIPeepholeSDWA.cpp
		SIRegisterInfo.cpp
		SIRemoveShortExecBranches.cpp
		SIShrinkInstructions.cpp
		SIWholeQuadMode.cpp
		GCNILPSched.cpp

+4 −1

Original line number	Diff line number	Diff line
		@@ -41,7 +41,7 @@ using namespace llvm;
		#define DEBUG_TYPE "si-insert-skips"

		static cl::opt<unsigned> SkipThresholdFlag(
		"amdgpu-skip-threshold",
		"amdgpu-skip-threshold-legacy",
		cl::desc("Number of instructions before jumping over divergent control flow"),
		cl::init(12), cl::Hidden);

		@@ -466,6 +466,9 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
		MachineInstr &MI = *I;

		switch (MI.getOpcode()) {
		case AMDGPU::S_CBRANCH_EXECZ:
		ExecBranchStack.push_back(MI.getOperand(0).getMBB());
		break;
		case AMDGPU::SI_MASK_BRANCH:
		ExecBranchStack.push_back(MI.getOperand(0).getMBB());
		MadeChange \|= skipMaskBranch(MI, MBB);

+5 −5

Original line number	Diff line number	Diff line
		@@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
		BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
		.addReg(Tmp, RegState::Kill);

		// Insert a pseudo terminator to help keep the verifier happy. This will also
		// be used later when inserting skips.
		MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
		// Insert the S_CBRANCH_EXECZ instruction which will be optimized later
		// during SIRemoveShortExecBranches.
		MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
		.add(MI.getOperand(2));

		if (!LIS) {
		@@ -323,7 +323,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
		.addReg(DstReg);

		MachineInstr *Branch =
		BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
		BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
		.addMBB(DestBB);

		if (!LIS) {