Reapply "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (ba385ae2) · Commits · llvm-doe / llvm-project

llvm/lib/CodeGen/RegisterCoalescer.cpp

+42 −9

Original line number	Diff line number	Diff line
		@@ -305,7 +305,11 @@ namespace {
		/// number if it is not zero. If DstReg is a physical register and the
		/// existing subregister number of the def / use being updated is not zero,
		/// make sure to set it to the correct physical subregister.
		void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
		///
		/// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
		/// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
		void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
		bool IsSubregToReg);

		/// If the given machine operand reads only undefined lanes add an undef
		/// flag.
		@@ -1323,8 +1327,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
		if (DstReg.isPhysical()) {
		Register NewDstReg = DstReg;

		unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
		DefMI->getOperand(0).getSubReg());
		unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx);
		if (NewDstIdx)
		NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);

		@@ -1473,7 +1476,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
		MRI->setRegClass(DstReg, NewRC);

		// Update machine operands and add flags.
		updateRegDefsUses(DstReg, DstReg, DstIdx);
		updateRegDefsUses(DstReg, DstReg, DstIdx, false);
		NewMI.getOperand(0).setSubReg(NewIdx);
		// updateRegDefUses can add an "undef" flag to the definition, since
		// it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
		@@ -1788,7 +1791,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
		}

		void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
		unsigned SubIdx) {
		unsigned SubIdx, bool IsSubregToReg) {
		bool DstIsPhys = DstReg.isPhysical();
		LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);

		@@ -1828,6 +1831,8 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
		if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
		Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));

		bool FullDef = true;

		// Replace SrcReg with DstReg in all UseMI operands.
		for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
		MachineOperand &MO = UseMI->getOperand(Ops[i]);
		@@ -1835,9 +1840,13 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
		// Adjust <undef> flags in case of sub-register joins. We don't want to
		// turn a full def into a read-modify-write sub-register def and vice
		// versa.
		if (SubIdx && MO.isDef())
		if (SubIdx && MO.isDef()) {
		MO.setIsUndef(!Reads);

		if (!Reads)
		FullDef = false;
		}

		// A subreg use of a partially undef (super) register may be a complete
		// undef use now and then has to be marked that way.
		if (MO.isUse() && !DstIsPhys) {
		@@ -1869,6 +1878,25 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
		MO.substVirtReg(DstReg, SubIdx, *TRI);
		}

		if (IsSubregToReg && !FullDef) {
		// If the coalesed instruction doesn't fully define the register, we need
		// to preserve the original super register liveness for SUBREG_TO_REG.
		//
		// We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
		// but it introduces liveness for other subregisters. Downstream users may
		// have been relying on those bits, so we need to ensure their liveness is
		// captured with a def of other lanes.

		// FIXME: Need to add new subrange if tracking subranges. We could also
		// skip adding this if we knew the other lanes are dead, and only for
		// other lanes.

		assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
		"this should update subranges");
		MachineInstrBuilder MIB(*MF, UseMI);
		MIB.addReg(DstReg, RegState::ImplicitDefine);
		}

		LLVM_DEBUG({
		dbgs() << "\t\tupdated: ";
		if (!UseMI->isDebugInstr())
		@@ -2068,6 +2096,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
		});
		}

		const bool IsSubregToReg = CopyMI->isSubregToReg();

		ShrinkMask = LaneBitmask::getNone();
		ShrinkMainRange = false;

		@@ -2135,9 +2165,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {

		// Rewrite all SrcReg operands to DstReg.
		// Also update DstReg operands to include DstIdx if it is set.
		if (CP.getDstIdx())
		updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
		updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
		if (CP.getDstIdx()) {
		assert(!IsSubregToReg && "can this happen?");
		updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
		}
		updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
		IsSubregToReg);

		// Shrink subregister ranges if necessary.
		if (ShrinkMask.any()) {

llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll

+46 −46

File changed.

Preview size limit exceeded, changes collapsed.

llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll

0 → 100644

+185 −0

Original line number	Diff line number	Diff line
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
		; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s \| FileCheck %s

		%struct.wibble = type { %struct.wombat }
		%struct.wombat = type { %struct.ham, [3 x i8] }
		%struct.ham = type { %struct.zot }
		%struct.zot = type { %struct.blam }
		%struct.blam = type { %struct.ham.0 }
		%struct.ham.0 = type { %struct.bar }
		%struct.bar = type { %struct.bar.1 }
		%struct.bar.1 = type { %struct.baz, i8 }
		%struct.baz = type { %struct.snork }
		%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
		%struct.spam = type { %struct.snork.2, %struct.snork.2 }
		%struct.snork.2 = type { i32 }
		%struct.snork.3 = type { %struct.baz, i8, [3 x i8] }

		define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 {
		; CHECK-LABEL: foo:
		; CHECK: # %bb.0: # %bb
		; CHECK-NEXT: pushq %rbp
		; CHECK-NEXT: .cfi_def_cfa_offset 16
		; CHECK-NEXT: .cfi_offset %rbp, -16
		; CHECK-NEXT: movq %rsp, %rbp
		; CHECK-NEXT: .cfi_def_cfa_register %rbp
		; CHECK-NEXT: pushq %r15
		; CHECK-NEXT: pushq %r14
		; CHECK-NEXT: pushq %r13
		; CHECK-NEXT: pushq %r12
		; CHECK-NEXT: pushq %rbx
		; CHECK-NEXT: subq $24, %rsp
		; CHECK-NEXT: .cfi_offset %rbx, -56
		; CHECK-NEXT: .cfi_offset %r12, -48
		; CHECK-NEXT: .cfi_offset %r13, -40
		; CHECK-NEXT: .cfi_offset %r14, -32
		; CHECK-NEXT: .cfi_offset %r15, -24
		; CHECK-NEXT: movl %r8d, %r14d
		; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
		; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
		; CHECK-NEXT: movq %rsi, %r13
		; CHECK-NEXT: movq %rdi, %r15
		; CHECK-NEXT: incl %r14d
		; CHECK-NEXT: xorl %ebx, %ebx
		; CHECK-NEXT: # implicit-def: $r12
		; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
		; CHECK-NEXT: jmp .LBB0_3
		; CHECK-NEXT: .p2align 4, 0x90
		; CHECK-NEXT: .LBB0_1: # %bb17
		; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
		; CHECK-NEXT: movq %r15, %r13
		; CHECK-NEXT: xorl %r15d, %r15d
		; CHECK-NEXT: testq %rbx, %rbx
		; CHECK-NEXT: sete %r15b
		; CHECK-NEXT: xorl %edi, %edi
		; CHECK-NEXT: callq _Znwm@PLT
		; CHECK-NEXT: shll $4, %r15d
		; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
		; CHECK-NEXT: movq %r12, %rcx
		; CHECK-NEXT: shrq $32, %rcx
		; CHECK-NEXT: movb %cl, 12(%rax)
		; CHECK-NEXT: movl %r12d, 8(%rax)
		; CHECK-NEXT: movq %r15, %rbx
		; CHECK-NEXT: movq %r13, %r15
		; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
		; CHECK-NEXT: decl %r14d
		; CHECK-NEXT: je .LBB0_8
		; CHECK-NEXT: .LBB0_3: # %bb7
		; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
		; CHECK-NEXT: callq widget@PLT
		; CHECK-NEXT: cmpb $-5, (%r13)
		; CHECK-NEXT: jae .LBB0_5
		; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1
		; CHECK-NEXT: movl %r12d, %r12d
		; CHECK-NEXT: cmpq %r15, %rbx
		; CHECK-NEXT: jbe .LBB0_1
		; CHECK-NEXT: jmp .LBB0_7
		; CHECK-NEXT: .p2align 4, 0x90
		; CHECK-NEXT: .LBB0_5: # %bb12
		; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
		; CHECK-NEXT: movq 0, %rax
		; CHECK-NEXT: movq 8, %rax
		; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
		; CHECK-NEXT: cmpq %r15, %rbx
		; CHECK-NEXT: jbe .LBB0_1
		; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1
		; CHECK-NEXT: xorl %eax, %eax
		; CHECK-NEXT: xorl %ebx, %ebx
		; CHECK-NEXT: decl %r14d
		; CHECK-NEXT: jne .LBB0_3
		; CHECK-NEXT: .LBB0_8: # %bb21
		; CHECK-NEXT: cmpb $0, 12(%rax)
		; CHECK-NEXT: jne .LBB0_10
		; CHECK-NEXT: # %bb.9: # %bb26
		; CHECK-NEXT: addq $24, %rsp
		; CHECK-NEXT: popq %rbx
		; CHECK-NEXT: popq %r12
		; CHECK-NEXT: popq %r13
		; CHECK-NEXT: popq %r14
		; CHECK-NEXT: popq %r15
		; CHECK-NEXT: popq %rbp
		; CHECK-NEXT: .cfi_def_cfa %rsp, 8
		; CHECK-NEXT: retq
		; CHECK-NEXT: .LBB0_10: # %bb25
		; CHECK-NEXT: .cfi_def_cfa %rbp, 16
		; CHECK-NEXT: movq %r15, %rdi
		; CHECK-NEXT: callq pluto@PLT
		bb:
		br label %bb7

		bb5: ; preds = %bb17, %bb14
		%phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ]
		%phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ]
		%add = add i32 %phi9, 1
		%icmp = icmp eq i32 %phi9, %arg4
		br i1 %icmp, label %bb21, label %bb7

		bb7: ; preds = %bb5, %bb
		%phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
		%phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
		%phi10 = phi i40 [ undef, %bb ], [ %phi15, %bb5 ]
		%call = call ptr @widget()
		%load = load i8, ptr %arg1, align 8
		%icmp11 = icmp ult i8 %load, -5
		%and = and i40 %phi10, 4294967295
		br i1 %icmp11, label %bb14, label %bb12

		bb12: ; preds = %bb7
		%load13 = load volatile { i64, i64 }, ptr null, align 4294967296
		br label %bb14

		bb14: ; preds = %bb12, %bb7
		%phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ]
		%icmp16 = icmp ugt ptr %phi8, %arg
		br i1 %icmp16, label %bb5, label %bb17

		bb17: ; preds = %bb14
		%icmp18 = icmp eq ptr %phi8, null
		%zext = zext i1 %icmp18 to i64
		%call19 = call ptr @_Znwm(i64 0)
		%getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
		%getelementptr20 = getelementptr i8, ptr %call19, i64 8
		store i40 %phi15, ptr %getelementptr20, align 4
		br label %bb5

		bb21: ; preds = %bb5
		%getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1
		%load23 = load i8, ptr %getelementptr22, align 4
		%icmp24 = icmp eq i8 %load23, 0
		br i1 %icmp24, label %bb26, label %bb25

		bb25: ; preds = %bb21
		call void @pluto(ptr %arg)
		unreachable

		bb26: ; preds = %bb21
		ret void
		}

		define void @eggs(ptr %arg, ptr %arg1) {
		; CHECK-LABEL: eggs:
		; CHECK: # %bb.0: # %bb
		; CHECK-NEXT: pushq %rax
		; CHECK-NEXT: .cfi_def_cfa_offset 16
		; CHECK-NEXT: movq %rdi, %rax
		; CHECK-NEXT: movq %rsi, %rdi
		; CHECK-NEXT: movq %rax, %rsi
		; CHECK-NEXT: xorl %edx, %edx
		; CHECK-NEXT: xorl %ecx, %ecx
		; CHECK-NEXT: xorl %r8d, %r8d
		; CHECK-NEXT: callq foo@PLT
		; CHECK-NEXT: popq %rax
		; CHECK-NEXT: .cfi_def_cfa_offset 8
		; CHECK-NEXT: retq
		bb:
		call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0)
		ret void
		}

		declare ptr @widget()

		declare void @pluto(ptr)

		declare ptr @_Znwm(i64)

		attributes #0 = { noinline "frame-pointer"="all" }

llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir

0 → 100644

+47 −0

Original line number	Diff line number	Diff line
		# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
		# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s \| FileCheck %s


		# FIXME: Need to handle subrange updates when coalescing with subreg_to_reg
		# This will fail if x86 enables subregister liveness.
		---
		name: requires_new_subrange_coalesce_subreg_to_reg
		tracksRegLiveness: true
		body: \|
		; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg
		; CHECK: bb.0:
		; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
		; CHECK-NEXT: liveins: $eax
		; CHECK-NEXT: {{ $}}
		; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax
		; CHECK-NEXT: %b:gr32 = IMPLICIT_DEF
		; CHECK-NEXT: %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
		; CHECK-NEXT: JCC_1 %bb.2, 4, implicit undef $eflags
		; CHECK-NEXT: {{ $}}
		; CHECK-NEXT: bb.1:
		; CHECK-NEXT: successors: %bb.2(0x80000000)
		; CHECK-NEXT: {{ $}}
		; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
		; CHECK-NEXT: %c.sub_32bit:gr64 = COPY %a
		; CHECK-NEXT: {{ $}}
		; CHECK-NEXT: bb.2:
		; CHECK-NEXT: %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
		; CHECK-NEXT: RET 0, implicit %c
		bb.0:
		liveins: $eax
		%init_eax:gr32 = COPY $eax
		%a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit
		%b:gr32 = IMPLICIT_DEF
		%c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
		JCC_1 %bb.2, 4, implicit undef $eflags

		bb.1:
		%imm0:gr32 = MOV32r0 implicit-def dead $eflags
		%a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit
		%c.sub_32bit = COPY %a

		bb.2:
		%c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
		RET 0, implicit %c

		...

llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir

0 → 100644

+348 −0

File added.

Preview size limit exceeded, changes collapsed.