Commit d461c802 authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r321791 and r321862:

------------------------------------------------------------------------
r321791 | sam_parker | 2018-01-04 01:42:27 -0800 (Thu, 04 Jan 2018) | 4 lines

[X86] Codegen test for PR37563

Adding test to ease review of D41628.

------------------------------------------------------------------------

------------------------------------------------------------------------
r321862 | sam_parker | 2018-01-05 00:47:23 -0800 (Fri, 05 Jan 2018) | 10 lines

[DAGCombine] Fix for PR37563

While searching for loads to be narrowed, equal sized loads were not
added to the list, resulting in anyext loads not being converted to
zext loads.

https://bugs.llvm.org/show_bug.cgi?id=35763

Differential Revision: https://reviews.llvm.org/D41628

------------------------------------------------------------------------

llvm-svn: 322671
parent 7b7f4c7e
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -3842,9 +3842,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
      EVT ExtVT;
      if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
          isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
        // Only add this load if we can make it more narrow.
        if (ExtVT.bitsLT(Load->getMemoryVT()))
        // ZEXTLOAD is already small enough.
        if (Load->getExtensionType() == ISD::ZEXTLOAD &&
            ExtVT.bitsGE(Load->getMemoryVT()))
          continue;
        // Use LE to convert equal sized loads to zext.
        if (ExtVT.bitsLE(Load->getMemoryVT()))
          Loads.insert(Load);
        continue;
      }
      return false;
@@ -3899,11 +3906,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
    if (Loads.size() == 0)
      return false;
    DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
    SDValue MaskOp = N->getOperand(1);
    // If it exists, fixup the single node we allow in the tree that needs
    // masking.
    if (FixupNode) {
      DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
      SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
                                FixupNode->getValueType(0),
                                SDValue(FixupNode, 0), MaskOp);
@@ -3928,6 +3937,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
    // Create narrow loads.
    for (auto *Load : Loads) {
      DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
      SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
                                SDValue(Load, 0), MaskOp);
      DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
+5 −9
Original line number Diff line number Diff line
@@ -852,8 +852,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
; ARM:       @ %bb.0: @ %entry
; ARM-NEXT:    ldrb r0, [r0]
; ARM-NEXT:    uxtb r2, r2
; ARM-NEXT:    and r0, r0, r1
; ARM-NEXT:    uxtb r1, r0
; ARM-NEXT:    and r1, r0, r1
; ARM-NEXT:    mov r0, #0
; ARM-NEXT:    cmp r1, r2
; ARM-NEXT:    movweq r0, #1
@@ -863,8 +862,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
; ARMEB:       @ %bb.0: @ %entry
; ARMEB-NEXT:    ldrb r0, [r0]
; ARMEB-NEXT:    uxtb r2, r2
; ARMEB-NEXT:    and r0, r0, r1
; ARMEB-NEXT:    uxtb r1, r0
; ARMEB-NEXT:    and r1, r0, r1
; ARMEB-NEXT:    mov r0, #0
; ARMEB-NEXT:    cmp r1, r2
; ARMEB-NEXT:    movweq r0, #1
@@ -872,9 +870,8 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
;
; THUMB1-LABEL: test6:
; THUMB1:       @ %bb.0: @ %entry
; THUMB1-NEXT:    ldrb r0, [r0]
; THUMB1-NEXT:    ands r0, r1
; THUMB1-NEXT:    uxtb r3, r0
; THUMB1-NEXT:    ldrb r3, [r0]
; THUMB1-NEXT:    ands r3, r1
; THUMB1-NEXT:    uxtb r2, r2
; THUMB1-NEXT:    movs r0, #1
; THUMB1-NEXT:    movs r1, #0
@@ -889,8 +886,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
; THUMB2:       @ %bb.0: @ %entry
; THUMB2-NEXT:    ldrb r0, [r0]
; THUMB2-NEXT:    uxtb r2, r2
; THUMB2-NEXT:    ands r0, r1
; THUMB2-NEXT:    uxtb r1, r0
; THUMB2-NEXT:    ands r1, r0
; THUMB2-NEXT:    movs r0, #0
; THUMB2-NEXT:    cmp r1, r2
; THUMB2-NEXT:    it eq
+42 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s

%struct.S = type <{ i16, i24, [5 x i8], i8, i16, [2 x i8] }>

@z = global { i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] } { i16 -724, i8 94, i8 -18, i8 5, i8 undef, i8 96, i8 104, i8 -24, i8 10, i8 0, [5 x i8] undef }, align 8
@tf_3_var_136 = global i64 0, align 8
@.str = private unnamed_addr constant [6 x i8] c"%llu\0A\00", align 1

define void @PR35763() {
; CHECK-LABEL: PR35763:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movzwl {{.*}}(%rip), %eax
; CHECK-NEXT:    movzwl z+{{.*}}(%rip), %ecx
; CHECK-NEXT:    orl %eax, %ecx
; CHECK-NEXT:    movq %rcx, {{.*}}(%rip)
; CHECK-NEXT:    movl z+{{.*}}(%rip), %eax
; CHECK-NEXT:    movzbl z+{{.*}}(%rip), %ecx
; CHECK-NEXT:    shlq $32, %rcx
; CHECK-NEXT:    orq %rax, %rcx
; CHECK-NEXT:    movabsq $1090921758719, %rax # imm = 0xFE0000FFFF
; CHECK-NEXT:    andq %rcx, %rax
; CHECK-NEXT:    movl %eax, z+{{.*}}(%rip)
; CHECK-NEXT:    shrq $32, %rax
; CHECK-NEXT:    movb %al, z+{{.*}}(%rip)
; CHECK-NEXT:    retq
entry:
  %0 = load i16, i16* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 0), align 8
  %conv = sext i16 %0 to i32
  %bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 1) to i32*), align 2
  %bf.clear = and i32 %bf.load, 2097151
  %bf.cast = zext i32 %bf.clear to i64
  %conv1 = trunc i64 %bf.cast to i32
  %or = or i32 %conv, %conv1
  %conv2 = trunc i32 %or to i16
  %conv3 = zext i16 %conv2 to i64
  store i64 %conv3, i64* @tf_3_var_136, align 8
  %bf.load4 = load i40, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2
  %bf.clear5 = and i40 %bf.load4, -8589869057
  store i40 %bf.clear5, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2
  ret void
}