Merge r330264 for the fix to PR37100, a regression introduced with the new (b0af7ced) · Commits · llvm-doe / llvm-project

llvm/lib/Target/X86/X86FlagsCopyLowering.cpp

+125 −82

Original line number	Diff line number	Diff line
		@@ -36,6 +36,7 @@
		#include "llvm/ADT/Statistic.h"
		#include "llvm/CodeGen/MachineBasicBlock.h"
		#include "llvm/CodeGen/MachineConstantPool.h"
		#include "llvm/CodeGen/MachineDominators.h"
		#include "llvm/CodeGen/MachineFunction.h"
		#include "llvm/CodeGen/MachineFunctionPass.h"
		#include "llvm/CodeGen/MachineInstr.h"
		@@ -98,6 +99,7 @@ private:
		const X86InstrInfo *TII;
		const TargetRegisterInfo *TRI;
		const TargetRegisterClass *PromoteRC;
		MachineDominatorTree *MDT;

		CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
		MachineInstr &CopyDefI);
		@@ -145,6 +147,7 @@ FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
		char X86FlagsCopyLoweringPass::ID = 0;

		void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
		AU.addRequired<MachineDominatorTree>();
		MachineFunctionPass::getAnalysisUsage(AU);
		}

		@@ -342,6 +345,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
		MRI = &MF.getRegInfo();
		TII = Subtarget.getInstrInfo();
		TRI = Subtarget.getRegisterInfo();
		MDT = &getAnalysis<MachineDominatorTree>();
		PromoteRC = &X86::GR8RegClass;

		if (MF.begin() == MF.end())
		@@ -416,7 +420,45 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
		// of these up front instead.
		CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI);

		for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end();
		// Collect the basic blocks we need to scan. Typically this will just be
		// a single basic block but we may have to scan multiple blocks if the
		// EFLAGS copy lives into successors.
		SmallVector<MachineBasicBlock *, 2> Blocks;
		SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
		Blocks.push_back(&MBB);
		VisitedBlocks.insert(&MBB);

		do {
		MachineBasicBlock &UseMBB = *Blocks.pop_back_val();

		// We currently don't do any PHI insertion and so we require that the
		// test basic block dominates all of the use basic blocks.
		//
		// We could in theory do PHI insertion here if it becomes useful by just
		// taking undef values in along every edge that we don't trace this
		// EFLAGS copy along. This isn't as bad as fully general PHI insertion,
		// but still seems like a great deal of complexity.
		//
		// Because it is theoretically possible that some earlier MI pass or
		// other lowering transformation could induce this to happen, we do
		// a hard check even in non-debug builds here.
		if (&TestMBB != &UseMBB && !MDT->dominates(&TestMBB, &UseMBB)) {
		DEBUG({
		dbgs() << "ERROR: Encountered use that is not dominated by our test "
		"basic block! Rewriting this would require inserting PHI "
		"nodes to track the flag state across the CFG.\n\nTest "
		"block:\n";
		TestMBB.dump();
		dbgs() << "Use block:\n";
		UseMBB.dump();
		});
		report_fatal_error("Cannot lower EFLAGS copy when original copy def "
		"does not dominate all uses.");
		}

		for (auto MII = &UseMBB == &MBB ? std::next(CopyI->getIterator())
		: UseMBB.instr_begin(),
		MIE = UseMBB.instr_end();
		MII != MIE;) {
		MachineInstr &MI = *MII++;
		MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
		@@ -426,9 +468,9 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
		// scanning here.
		//
		// NB!!! Many instructions only modify some flags. LLVM currently
		// models this as clobbering all flags, but if that ever changes this
		// will need to be carefully updated to handle that more complex
		// logic.
		// models this as clobbering all flags, but if that ever changes
		// this will need to be carefully updated to handle that more
		// complex logic.
		FlagsKilled = true;
		break;
		}
		@@ -454,7 +496,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
		do {
		JmpIs.push_back(&*JmpIt);
		++JmpIt;
		} while (JmpIt != MBB.instr_end() &&
		} while (JmpIt != UseMBB.instr_end() &&
		X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
		X86::COND_INVALID);
		break;
		@@ -463,12 +505,14 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
		// Otherwise we can just rewrite in-place.
		if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
		rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
		} else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) {
		} else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
		X86::COND_INVALID) {
		rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
		} else if (MI.getOpcode() == TargetOpcode::COPY) {
		rewriteCopy(MI, *FlagUse, CopyDefI);
		} else {
		// We assume that arithmetic instructions that use flags also def them.
		// We assume that arithmetic instructions that use flags also def
		// them.
		assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
		"Expected a def of EFLAGS for this instruction!");

		@@ -489,30 +533,29 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
		break;
		}

		// If we didn't find a kill (or equivalent) check that the flags don't
		// live-out of the basic block. Currently we don't support lowering copies
		// of flags that live out in this fashion.
		if (!FlagsKilled &&
		llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) {
		return SuccMBB->isLiveIn(X86::EFLAGS);
		})) {
		DEBUG({
		dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n"
		<< "----\n";
		MBB.dump();
		dbgs() << "----\n"
		<< "ERROR: Cannot lower this EFLAGS copy!\n";
		});
		report_fatal_error(
		"Cannot lower EFLAGS copy that lives out of a basic block!");
		}
		// If the flags were killed, we're done with this block.
		if (FlagsKilled)
		break;

		// Otherwise we need to scan successors for ones where the flags live-in
		// and queue those up for processing.
		for (MachineBasicBlock *SuccMBB : UseMBB.successors())
		if (SuccMBB->isLiveIn(X86::EFLAGS) &&
		VisitedBlocks.insert(SuccMBB).second)
		Blocks.push_back(SuccMBB);
		} while (!Blocks.empty());

		// Now rewrite the jumps that use the flags. These we handle specially
		// because if there are multiple jumps we'll have to do surgery on the CFG.
		// because if there are multiple jumps in a single basic block we'll have
		// to do surgery on the CFG.
		MachineBasicBlock *LastJmpMBB = nullptr;
		for (MachineInstr *JmpI : JmpIs) {
		// Past the first jump we need to split the blocks apart.
		if (JmpI != JmpIs.front())
		// Past the first jump within a basic block we need to split the blocks
		// apart.
		if (JmpI->getParent() == LastJmpMBB)
		splitBlock(JmpI->getParent(), JmpI, *TII);
		else
		LastJmpMBB = JmpI->getParent();

		rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
		}

llvm/test/CodeGen/X86/O0-pipeline.ll

+1 −0

Original line number	Diff line number	Diff line
		@@ -37,6 +37,7 @@
		; CHECK-NEXT: X86 PIC Global Base Reg Initialization
		; CHECK-NEXT: Expand ISel Pseudo-instructions
		; CHECK-NEXT: Local Stack Slot Allocation
		; CHECK-NEXT: MachineDominator Tree Construction
		; CHECK-NEXT: X86 EFLAGS copy lowering
		; CHECK-NEXT: X86 WinAlloca Expander
		; CHECK-NEXT: Eliminate PHI nodes for register allocation

llvm/test/CodeGen/X86/copy-eflags.ll

+108 −0

Original line number	Diff line number	Diff line
		@@ -196,3 +196,111 @@ else:
		tail call void @external_b()
		ret void
		}

		; Test a function that gets special select lowering into CFG with copied EFLAGS
		; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
		; cross-block rewrites in at least some narrow cases.
		define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
		; X32-LABEL: PR37100:
		; X32: # %bb.0: # %bb
		; X32-NEXT: pushl %ebp
		; X32-NEXT: .cfi_def_cfa_offset 8
		; X32-NEXT: pushl %ebx
		; X32-NEXT: .cfi_def_cfa_offset 12
		; X32-NEXT: pushl %edi
		; X32-NEXT: .cfi_def_cfa_offset 16
		; X32-NEXT: pushl %esi
		; X32-NEXT: .cfi_def_cfa_offset 20
		; X32-NEXT: .cfi_offset %esi, -20
		; X32-NEXT: .cfi_offset %edi, -16
		; X32-NEXT: .cfi_offset %ebx, -12
		; X32-NEXT: .cfi_offset %ebp, -8
		; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
		; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
		; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
		; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
		; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
		; X32-NEXT: jmp .LBB3_1
		; X32-NEXT: .p2align 4, 0x90
		; X32-NEXT: .LBB3_5: # %bb1
		; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
		; X32-NEXT: xorl %eax, %eax
		; X32-NEXT: xorl %edx, %edx
		; X32-NEXT: idivl %ebp
		; X32-NEXT: .LBB3_1: # %bb1
		; X32-NEXT: # =>This Inner Loop Header: Depth=1
		; X32-NEXT: movsbl %cl, %eax
		; X32-NEXT: movl %eax, %edx
		; X32-NEXT: sarl $31, %edx
		; X32-NEXT: cmpl %eax, %esi
		; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X32-NEXT: sbbl %edx, %eax
		; X32-NEXT: setl %al
		; X32-NEXT: setl %dl
		; X32-NEXT: movzbl %dl, %ebp
		; X32-NEXT: negl %ebp
		; X32-NEXT: testb $-1, %al
		; X32-NEXT: jne .LBB3_3
		; X32-NEXT: # %bb.2: # %bb1
		; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
		; X32-NEXT: movb %ch, %cl
		; X32-NEXT: .LBB3_3: # %bb1
		; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
		; X32-NEXT: movb %cl, (%ebx)
		; X32-NEXT: movl (%edi), %edx
		; X32-NEXT: testb $-1, %al
		; X32-NEXT: jne .LBB3_5
		; X32-NEXT: # %bb.4: # %bb1
		; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
		; X32-NEXT: movl %edx, %ebp
		; X32-NEXT: jmp .LBB3_5
		;
		; X64-LABEL: PR37100:
		; X64: # %bb.0: # %bb
		; X64-NEXT: movq %rdx, %r10
		; X64-NEXT: jmp .LBB3_1
		; X64-NEXT: .p2align 4, 0x90
		; X64-NEXT: .LBB3_5: # %bb1
		; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
		; X64-NEXT: xorl %eax, %eax
		; X64-NEXT: xorl %edx, %edx
		; X64-NEXT: idivl %esi
		; X64-NEXT: .LBB3_1: # %bb1
		; X64-NEXT: # =>This Inner Loop Header: Depth=1
		; X64-NEXT: movsbq %dil, %rax
		; X64-NEXT: xorl %esi, %esi
		; X64-NEXT: cmpq %rax, %r10
		; X64-NEXT: setl %sil
		; X64-NEXT: negl %esi
		; X64-NEXT: cmpq %rax, %r10
		; X64-NEXT: jl .LBB3_3
		; X64-NEXT: # %bb.2: # %bb1
		; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
		; X64-NEXT: movl %ecx, %edi
		; X64-NEXT: .LBB3_3: # %bb1
		; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
		; X64-NEXT: movb %dil, (%r8)
		; X64-NEXT: jl .LBB3_5
		; X64-NEXT: # %bb.4: # %bb1
		; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
		; X64-NEXT: movl (%r9), %esi
		; X64-NEXT: jmp .LBB3_5
		bb:
		br label %bb1

		bb1:
		%tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
		%tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
		%tmp3 = icmp sgt i16 %tmp2, 7
		%tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
		%tmp5 = sext i8 %tmp to i64
		%tmp6 = icmp slt i64 %arg3, %tmp5
		%tmp7 = sext i1 %tmp6 to i32
		%tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
		store volatile i8 %tmp8, i8* %ptr1
		%tmp9 = load volatile i32, i32* %ptr2
		%tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
		%tmp11 = srem i32 0, %tmp10
		%tmp12 = trunc i32 %tmp11 to i16
		br label %bb1
		}