Merging r321791 and r321862: (d461c802) · Commits · llvm-doe / llvm-project

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+12 −2

Original line number	Diff line number	Diff line
		@@ -3842,9 +3842,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
		EVT ExtVT;
		if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
		isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
		// Only add this load if we can make it more narrow.
		if (ExtVT.bitsLT(Load->getMemoryVT()))

		// ZEXTLOAD is already small enough.
		if (Load->getExtensionType() == ISD::ZEXTLOAD &&
		ExtVT.bitsGE(Load->getMemoryVT()))
		continue;

		// Use LE to convert equal sized loads to zext.
		if (ExtVT.bitsLE(Load->getMemoryVT()))
		Loads.insert(Load);

		continue;
		}
		return false;
		@@ -3899,11 +3906,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
		if (Loads.size() == 0)
		return false;

		DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
		SDValue MaskOp = N->getOperand(1);

		// If it exists, fixup the single node we allow in the tree that needs
		// masking.
		if (FixupNode) {
		DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
		SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
		FixupNode->getValueType(0),
		SDValue(FixupNode, 0), MaskOp);
		@@ -3928,6 +3937,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {

		// Create narrow loads.
		for (auto *Load : Loads) {
		DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
		SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
		SDValue(Load, 0), MaskOp);
		DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);

llvm/test/CodeGen/ARM/and-load-combine.ll

+5 −9

Original line number	Diff line number	Diff line
		@@ -852,8 +852,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
		; ARM: @ %bb.0: @ %entry
		; ARM-NEXT: ldrb r0, [r0]
		; ARM-NEXT: uxtb r2, r2
		; ARM-NEXT: and r0, r0, r1
		; ARM-NEXT: uxtb r1, r0
		; ARM-NEXT: and r1, r0, r1
		; ARM-NEXT: mov r0, #0
		; ARM-NEXT: cmp r1, r2
		; ARM-NEXT: movweq r0, #1
		@@ -863,8 +862,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
		; ARMEB: @ %bb.0: @ %entry
		; ARMEB-NEXT: ldrb r0, [r0]
		; ARMEB-NEXT: uxtb r2, r2
		; ARMEB-NEXT: and r0, r0, r1
		; ARMEB-NEXT: uxtb r1, r0
		; ARMEB-NEXT: and r1, r0, r1
		; ARMEB-NEXT: mov r0, #0
		; ARMEB-NEXT: cmp r1, r2
		; ARMEB-NEXT: movweq r0, #1
		@@ -872,9 +870,8 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
		;
		; THUMB1-LABEL: test6:
		; THUMB1: @ %bb.0: @ %entry
		; THUMB1-NEXT: ldrb r0, [r0]
		; THUMB1-NEXT: ands r0, r1
		; THUMB1-NEXT: uxtb r3, r0
		; THUMB1-NEXT: ldrb r3, [r0]
		; THUMB1-NEXT: ands r3, r1
		; THUMB1-NEXT: uxtb r2, r2
		; THUMB1-NEXT: movs r0, #1
		; THUMB1-NEXT: movs r1, #0
		@@ -889,8 +886,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
		; THUMB2: @ %bb.0: @ %entry
		; THUMB2-NEXT: ldrb r0, [r0]
		; THUMB2-NEXT: uxtb r2, r2
		; THUMB2-NEXT: ands r0, r1
		; THUMB2-NEXT: uxtb r1, r0
		; THUMB2-NEXT: ands r1, r0
		; THUMB2-NEXT: movs r0, #0
		; THUMB2-NEXT: cmp r1, r2
		; THUMB2-NEXT: it eq

llvm/test/CodeGen/X86/pr37563.ll

0 → 100644

+42 −0

Original line number	Diff line number	Diff line
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
		; RUN: llc -mtriple=x86_64-linux-gnu %s -o - \| FileCheck %s

		%struct.S = type <{ i16, i24, [5 x i8], i8, i16, [2 x i8] }>

		@z = global { i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] } { i16 -724, i8 94, i8 -18, i8 5, i8 undef, i8 96, i8 104, i8 -24, i8 10, i8 0, [5 x i8] undef }, align 8
		@tf_3_var_136 = global i64 0, align 8
		@.str = private unnamed_addr constant [6 x i8] c"%llu\0A\00", align 1

		define void @PR35763() {
		; CHECK-LABEL: PR35763:
		; CHECK: # %bb.0: # %entry
		; CHECK-NEXT: movzwl {{.*}}(%rip), %eax
		; CHECK-NEXT: movzwl z+{{.*}}(%rip), %ecx
		; CHECK-NEXT: orl %eax, %ecx
		; CHECK-NEXT: movq %rcx, {{.*}}(%rip)
		; CHECK-NEXT: movl z+{{.*}}(%rip), %eax
		; CHECK-NEXT: movzbl z+{{.*}}(%rip), %ecx
		; CHECK-NEXT: shlq $32, %rcx
		; CHECK-NEXT: orq %rax, %rcx
		; CHECK-NEXT: movabsq $1090921758719, %rax # imm = 0xFE0000FFFF
		; CHECK-NEXT: andq %rcx, %rax
		; CHECK-NEXT: movl %eax, z+{{.*}}(%rip)
		; CHECK-NEXT: shrq $32, %rax
		; CHECK-NEXT: movb %al, z+{{.*}}(%rip)
		; CHECK-NEXT: retq
		entry:
		%0 = load i16, i16* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 0), align 8
		%conv = sext i16 %0 to i32
		%bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S), i32 0, i32 1) to i32), align 2
		%bf.clear = and i32 %bf.load, 2097151
		%bf.cast = zext i32 %bf.clear to i64
		%conv1 = trunc i64 %bf.cast to i32
		%or = or i32 %conv, %conv1
		%conv2 = trunc i32 %or to i16
		%conv3 = zext i16 %conv2 to i64
		store i64 %conv3, i64* @tf_3_var_136, align 8
		%bf.load4 = load i40, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S), i32 0, i32 2) to i40), align 2
		%bf.clear5 = and i40 %bf.load4, -8589869057
		store i40 %bf.clear5, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S), i32 0, i32 2) to i40), align 2
		ret void
		}