Merge r332389 to pick up the fix for PR37431, a regression w/ the new EFLAGS lowering. (8d367ae1) · Commits · llvm-doe / llvm-project

llvm/lib/Target/X86/X86FlagsCopyLowering.cpp

+142 −3

Original line number	Diff line number	Diff line
		@@ -127,6 +127,10 @@ private:
		MachineInstr &JmpI, CondRegArray &CondRegs);
		void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
		MachineInstr &CopyDefI);
		void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
		MachineBasicBlock::iterator TestPos,
		DebugLoc TestLoc, MachineInstr &SetBI,
		MachineOperand &FlagUse, CondRegArray &CondRegs);
		void rewriteSetCC(MachineBasicBlock &TestMBB,
		MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
		MachineInstr &SetCCI, MachineOperand &FlagUse,
		@@ -511,8 +515,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
		} else if (MI.getOpcode() == TargetOpcode::COPY) {
		rewriteCopy(MI, *FlagUse, CopyDefI);
		} else {
		// We assume that arithmetic instructions that use flags also def
		// them.
		// We assume all other instructions that use flags also def them.
		assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
		"Expected a def of EFLAGS for this instruction!");

		@@ -524,7 +527,23 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
		// logic.
		FlagsKilled = true;

		rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
		switch (MI.getOpcode()) {
		case X86::SETB_C8r:
		case X86::SETB_C16r:
		case X86::SETB_C32r:
		case X86::SETB_C64r:
		// Use custom lowering for arithmetic that is merely extending the
		// carry flag. We model this as the SETB_C* pseudo instructions.
		rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
		CondRegs);
		break;

		default:
		// Generically handle remaining uses as arithmetic instructions.
		rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
		CondRegs);
		break;
		}
		break;
		}

		@@ -756,6 +775,126 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
		MI.eraseFromParent();
		}

		void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
		MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
		DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
		CondRegArray &CondRegs) {
		// This routine is only used to handle pseudos for setting a register to zero
		// or all ones based on CF. This is essentially the sign extended from 1-bit
		// form of SETB and modeled with the SETB_C* pseudos. They require special
		// handling as they aren't normal SETcc instructions and are lowered to an
		// EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
		// they are only provided in reg-defining forms. A complicating factor is that
		// they can define many different register widths.
		assert(SetBI.getOperand(0).isReg() &&
		"Cannot have a non-register defined operand to this variant of SETB!");

		// Little helper to do the common final step of replacing the register def'ed
		// by this SETB instruction with a new register and removing the SETB
		// instruction.
		auto RewriteToReg = [&](unsigned Reg) {
		MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
		SetBI.eraseFromParent();
		};

		// Grab the register class used for this particular instruction.
		auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());

		MachineBasicBlock &MBB = *SetBI.getParent();
		auto SetPos = SetBI.getIterator();
		auto SetLoc = SetBI.getDebugLoc();

		auto AdjustReg = [&](unsigned Reg) {
		auto &OrigRC = *MRI->getRegClass(Reg);
		if (&OrigRC == &SetBRC)
		return Reg;

		unsigned NewReg;

		int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
		int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
		assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
		assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
		int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
		X86::NoSubRegister, X86::sub_32bit};

		// If the original size is smaller than the target and is smaller than 4
		// bytes, we need to explicitly zero extend it. We always extend to 4-bytes
		// to maximize the chance of being able to CSE that operation and to avoid
		// partial dependency stalls extending to 2-bytes.
		if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
		NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
		BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
		.addReg(Reg);
		if (&SetBRC == &X86::GR32RegClass)
		return NewReg;
		Reg = NewReg;
		OrigRegSize = 4;
		}

		NewReg = MRI->createVirtualRegister(&SetBRC);
		if (OrigRegSize < TargetRegSize) {
		BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
		NewReg)
		.addImm(0)
		.addReg(Reg)
		.addImm(SubRegIdx[OrigRegSize]);
		} else if (OrigRegSize > TargetRegSize) {
		BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
		NewReg)
		.addReg(Reg)
		.addImm(SubRegIdx[TargetRegSize]);
		} else {
		BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
		.addReg(Reg);
		}
		return NewReg;
		};

		unsigned &CondReg = CondRegs[X86::COND_B];
		if (!CondReg)
		CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);

		// Adjust the condition to have the desired register width by zero-extending
		// as needed.
		// FIXME: We should use a better API to avoid the local reference and using a
		// different variable here.
		unsigned ExtCondReg = AdjustReg(CondReg);

		// Now we need to turn this into a bitmask. We do this by subtracting it from
		// zero.
		unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
		BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
		ZeroReg = AdjustReg(ZeroReg);

		unsigned Sub;
		switch (SetBI.getOpcode()) {
		case X86::SETB_C8r:
		Sub = X86::SUB8rr;
		break;

		case X86::SETB_C16r:
		Sub = X86::SUB16rr;
		break;

		case X86::SETB_C32r:
		Sub = X86::SUB32rr;
		break;

		case X86::SETB_C64r:
		Sub = X86::SUB64rr;
		break;

		default:
		llvm_unreachable("Invalid SETB_C* opcode!");
		}
		unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
		BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
		.addReg(ZeroReg)
		.addReg(ExtCondReg);
		return RewriteToReg(ResultReg);
		}

		void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
		MachineBasicBlock::iterator TestPos,
		DebugLoc TestLoc,

llvm/test/CodeGen/X86/copy-eflags.ll

+58 −0

Original line number	Diff line number	Diff line
		@@ -304,3 +304,61 @@ bb1:
		%tmp12 = trunc i32 %tmp11 to i16
		br label %bb1
		}

		; Use a particular instruction pattern in order to lower to the post-RA pseudo
		; used to lower SETB into an SBB pattern in order to make sure that kind of
		; usage of a copied EFLAGS continues to work.
		define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
		; X32-LABEL: PR37431:
		; X32: # %bb.0: # %entry
		; X32-NEXT: pushl %esi
		; X32-NEXT: .cfi_def_cfa_offset 8
		; X32-NEXT: .cfi_offset %esi, -8
		; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X32-NEXT: movl (%eax), %eax
		; X32-NEXT: movl %eax, %ecx
		; X32-NEXT: sarl $31, %ecx
		; X32-NEXT: cmpl %eax, %eax
		; X32-NEXT: sbbl %ecx, %eax
		; X32-NEXT: setb %al
		; X32-NEXT: sbbb %cl, %cl
		; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
		; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
		; X32-NEXT: movb %cl, (%edx)
		; X32-NEXT: movzbl %al, %eax
		; X32-NEXT: xorl %ecx, %ecx
		; X32-NEXT: subl %eax, %ecx
		; X32-NEXT: xorl %eax, %eax
		; X32-NEXT: xorl %edx, %edx
		; X32-NEXT: idivl %ecx
		; X32-NEXT: movb %dl, (%esi)
		; X32-NEXT: popl %esi
		; X32-NEXT: retl
		;
		; X64-LABEL: PR37431:
		; X64: # %bb.0: # %entry
		; X64-NEXT: movq %rdx, %rcx
		; X64-NEXT: movslq (%rdi), %rax
		; X64-NEXT: cmpq %rax, %rax
		; X64-NEXT: sbbb %dl, %dl
		; X64-NEXT: cmpq %rax, %rax
		; X64-NEXT: movb %dl, (%rsi)
		; X64-NEXT: sbbl %esi, %esi
		; X64-NEXT: xorl %eax, %eax
		; X64-NEXT: xorl %edx, %edx
		; X64-NEXT: idivl %esi
		; X64-NEXT: movb %dl, (%rcx)
		; X64-NEXT: retq
		entry:
		%tmp = load i32, i32* %arg1
		%tmp1 = sext i32 %tmp to i64
		%tmp2 = icmp ugt i64 %tmp1, undef
		%tmp3 = zext i1 %tmp2 to i8
		%tmp4 = sub i8 0, %tmp3
		store i8 %tmp4, i8* %arg2
		%tmp5 = sext i8 %tmp4 to i32
		%tmp6 = srem i32 0, %tmp5
		%tmp7 = trunc i32 %tmp6 to i8
		store i8 %tmp7, i8* %arg3
		ret void
		}

llvm/test/CodeGen/X86/flags-copy-lowering.mir

+71 −0

Original line number	Diff line number	Diff line
		@@ -66,6 +66,12 @@
		call void @foo()
		ret void
		}

		define void @test_setb_c(i64 %a, i64 %b) {
		entry:
		call void @foo()
		ret void
		}
		...
		---
		name: test_branch
		@@ -482,3 +488,68 @@ body: \|
		RET 0

		...
		---
		name: test_setb_c
		# CHECK-LABEL: name: test_setb_c
		liveins:
		- { reg: '%rdi', virtual-reg: '%0' }
		- { reg: '%rsi', virtual-reg: '%1' }
		body: \|
		bb.0:
		liveins: %rdi, %rsi

		%0:gr64 = COPY %rdi
		%1:gr64 = COPY %rsi
		%2:gr64 = ADD64rr %0, %1, implicit-def %eflags
		%3:gr64 = COPY %eflags
		; CHECK-NOT: COPY{{( killed)?}} %eflags
		; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
		; CHECK-NOT: COPY{{( killed)?}} %eflags

		ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
		CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
		ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp

		%eflags = COPY %3
		%4:gr8 = SETB_C8r implicit-def %eflags, implicit %eflags
		MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4
		; CHECK-NOT: %eflags =
		; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
		; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
		; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
		; CHECK-NEXT: MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]

		%eflags = COPY %3
		%5:gr16 = SETB_C16r implicit-def %eflags, implicit %eflags
		MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %5
		; CHECK-NOT: %eflags =
		; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
		; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
		; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
		; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
		; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
		; CHECK-NEXT: MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]

		%eflags = COPY %3
		%6:gr32 = SETB_C32r implicit-def %eflags, implicit %eflags
		MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %6
		; CHECK-NOT: %eflags =
		; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
		; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
		; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
		; CHECK-NEXT: MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]

		%eflags = COPY %3
		%7:gr64 = SETB_C64r implicit-def %eflags, implicit %eflags
		MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %7
		; CHECK-NOT: %eflags =
		; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
		; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
		; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
		; CHECK-NEXT: %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
		; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
		; CHECK-NEXT: MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]

		RET 0

		...