Commit 01884d24 authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r308986 and r308963:

------------------------------------------------------------------------
r308963 | rksimon | 2017-07-25 03:33:36 -0700 (Tue, 25 Jul 2017) | 1 line

[X86] Add 24-byte memcmp tests (PR33914)
------------------------------------------------------------------------

------------------------------------------------------------------------
r308986 | rksimon | 2017-07-25 10:04:37 -0700 (Tue, 25 Jul 2017) | 9 lines

[X86][CGP] Reduce memcmp() expansion to 2 load pairs (PR33914)

D35067/rL308322 attempted to support up to 4 load pairs for memcmp inlining which resulted in regressions for some optimized libc memcmp implementations (PR33914).

Until we can match these more optimal cases, this patch reduces the memcmp expansion to a maximum of 2 load pairs (which matches what we do for -Os).

This patch should be considered for the 5.0.0 release branch as well

Differential Revision: https://reviews.llvm.org/D35830
------------------------------------------------------------------------

llvm-svn: 309127
parent 5e62a55b
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -1672,8 +1672,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
  // TODO: These control memcmp expansion in CGP and could be raised higher, but
  // that needs to benchmarked and balanced with the potential use of vector
  // load/store types (PR33329).
  MaxLoadsPerMemcmp = 4;
  // load/store types (PR33329, PR33914).
  MaxLoadsPerMemcmp = 2;
  MaxLoadsPerMemcmpOptSize = 2;
  // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
+87 −0
Original line number Diff line number Diff line
@@ -527,6 +527,93 @@ define i1 @length16_eq_const(i8* %X) nounwind minsize {
  ret i1 %c
}

; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914

define i32 @length24(i8* %X, i8* %Y) nounwind minsize {
; X86-LABEL: length24:
; X86:       # BB#0:
; X86-NEXT:    subl $16, %esp
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT:    movl %eax, (%esp)
; X86-NEXT:    andl $0, {{[0-9]+}}(%esp)
; X86-NEXT:    movl $24, {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    retl
;
; X64-LABEL: length24:
; X64:       # BB#0:
; X64-NEXT:    pushq $24
; X64-NEXT:    popq %rdx
; X64-NEXT:    jmp memcmp # TAILCALL
  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
  ret i32 %m
}

define i1 @length24_eq(i8* %x, i8* %y) nounwind minsize {
; X86-LABEL: length24_eq:
; X86:       # BB#0:
; X86-NEXT:    subl $16, %esp
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT:    movl %eax, (%esp)
; X86-NEXT:    andl $0, {{[0-9]+}}(%esp)
; X86-NEXT:    movl $24, {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    sete %al
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    retl
;
; X64-LABEL: length24_eq:
; X64:       # BB#0:
; X64-NEXT:    pushq %rax
; X64-NEXT:    pushq $24
; X64-NEXT:    popq %rdx
; X64-NEXT:    callq memcmp
; X64-NEXT:    testl %eax, %eax
; X64-NEXT:    sete %al
; X64-NEXT:    popq %rcx
; X64-NEXT:    retq
  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
  %cmp = icmp eq i32 %call, 0
  ret i1 %cmp
}

define i1 @length24_eq_const(i8* %X) nounwind minsize {
; X86-LABEL: length24_eq_const:
; X86:       # BB#0:
; X86-NEXT:    subl $16, %esp
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, (%esp)
; X86-NEXT:    andl $0, {{[0-9]+}}(%esp)
; X86-NEXT:    movl $24, {{[0-9]+}}(%esp)
; X86-NEXT:    movl $.L.str, {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    setne %al
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    retl
;
; X64-LABEL: length24_eq_const:
; X64:       # BB#0:
; X64-NEXT:    pushq %rax
; X64-NEXT:    pushq $24
; X64-NEXT:    popq %rdx
; X64-NEXT:    movl $.L.str, %esi
; X64-NEXT:    callq memcmp
; X64-NEXT:    testl %eax, %eax
; X64-NEXT:    setne %al
; X64-NEXT:    popq %rcx
; X64-NEXT:    retq
  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
  %c = icmp ne i32 %m, 0
  ret i1 %c
}

define i32 @length32(i8* %X, i8* %Y) nounwind minsize {
; X86-LABEL: length32:
; X86:       # BB#0:
+76 −0
Original line number Diff line number Diff line
@@ -699,6 +699,82 @@ define i1 @length16_eq_const(i8* %X) nounwind optsize {
  ret i1 %c
}

; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914

define i32 @length24(i8* %X, i8* %Y) nounwind optsize {
; X86-LABEL: length24:
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $24
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    retl
;
; X64-LABEL: length24:
; X64:       # BB#0:
; X64-NEXT:    movl $24, %edx
; X64-NEXT:    jmp memcmp # TAILCALL
  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
  ret i32 %m
}

define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
; X86-LABEL: length24_eq:
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $24
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    sete %al
; X86-NEXT:    retl
;
; X64-LABEL: length24_eq:
; X64:       # BB#0:
; X64-NEXT:    pushq %rax
; X64-NEXT:    movl $24, %edx
; X64-NEXT:    callq memcmp
; X64-NEXT:    testl %eax, %eax
; X64-NEXT:    sete %al
; X64-NEXT:    popq %rcx
; X64-NEXT:    retq
  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
  %cmp = icmp eq i32 %call, 0
  ret i1 %cmp
}

define i1 @length24_eq_const(i8* %X) nounwind optsize {
; X86-LABEL: length24_eq_const:
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $24
; X86-NEXT:    pushl $.L.str
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    setne %al
; X86-NEXT:    retl
;
; X64-LABEL: length24_eq_const:
; X64:       # BB#0:
; X64-NEXT:    pushq %rax
; X64-NEXT:    movl $.L.str, %esi
; X64-NEXT:    movl $24, %edx
; X64-NEXT:    callq memcmp
; X64-NEXT:    testl %eax, %eax
; X64-NEXT:    setne %al
; X64-NEXT:    popq %rcx
; X64-NEXT:    retq
  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
  %c = icmp ne i32 %m, 0
  ret i1 %c
}

define i32 @length32(i8* %X, i8* %Y) nounwind optsize {
; X86-LABEL: length32:
; X86:       # BB#0:
+184 −226
Original line number Diff line number Diff line
@@ -475,25 +475,14 @@ define i1 @length8_eq_const(i8* %X) nounwind {

define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length12_eq:
; X86:       # BB#0: # %loadbb
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl (%ecx), %edx
; X86-NEXT:    cmpl (%eax), %edx
; X86-NEXT:    jne .LBB14_1
; X86-NEXT:  # BB#2: # %loadbb1
; X86-NEXT:    movl 4(%ecx), %edx
; X86-NEXT:    cmpl 4(%eax), %edx
; X86-NEXT:    jne .LBB14_1
; X86-NEXT:  # BB#3: # %loadbb2
; X86-NEXT:    movl 8(%ecx), %edx
; X86-NEXT:    xorl %ecx, %ecx
; X86-NEXT:    cmpl 8(%eax), %edx
; X86-NEXT:    je .LBB14_4
; X86-NEXT:  .LBB14_1: # %res_block
; X86-NEXT:    movl $1, %ecx
; X86-NEXT:  .LBB14_4: # %endblock
; X86-NEXT:    testl %ecx, %ecx
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $12
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    setne %al
; X86-NEXT:    retl
;
@@ -520,40 +509,13 @@ define i1 @length12_eq(i8* %X, i8* %Y) nounwind {

define i32 @length12(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length12:
; X86:       # BB#0: # %loadbb
; X86-NEXT:    pushl %esi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    movl (%esi), %ecx
; X86-NEXT:    movl (%eax), %edx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    bswapl %edx
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    jne .LBB15_1
; X86-NEXT:  # BB#2: # %loadbb1
; X86-NEXT:    movl 4(%esi), %ecx
; X86-NEXT:    movl 4(%eax), %edx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    bswapl %edx
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    jne .LBB15_1
; X86-NEXT:  # BB#3: # %loadbb2
; X86-NEXT:    movl 8(%esi), %ecx
; X86-NEXT:    movl 8(%eax), %edx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    bswapl %edx
; X86-NEXT:    xorl %eax, %eax
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    jne .LBB15_1
; X86-NEXT:  # BB#4: # %endblock
; X86-NEXT:    popl %esi
; X86-NEXT:    retl
; X86-NEXT:  .LBB15_1: # %res_block
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    movl $-1, %ecx
; X86-NEXT:    movl $1, %eax
; X86-NEXT:    cmovbl %ecx, %eax
; X86-NEXT:    popl %esi
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $12
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    retl
;
; X64-LABEL: length12:
@@ -588,47 +550,13 @@ define i32 @length12(i8* %X, i8* %Y) nounwind {

define i32 @length16(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length16:
; X86:       # BB#0: # %loadbb
; X86-NEXT:    pushl %esi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    movl (%esi), %ecx
; X86-NEXT:    movl (%eax), %edx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    bswapl %edx
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    jne .LBB16_1
; X86-NEXT:  # BB#2: # %loadbb1
; X86-NEXT:    movl 4(%esi), %ecx
; X86-NEXT:    movl 4(%eax), %edx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    bswapl %edx
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    jne .LBB16_1
; X86-NEXT:  # BB#3: # %loadbb2
; X86-NEXT:    movl 8(%esi), %ecx
; X86-NEXT:    movl 8(%eax), %edx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    bswapl %edx
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    jne .LBB16_1
; X86-NEXT:  # BB#4: # %loadbb3
; X86-NEXT:    movl 12(%esi), %ecx
; X86-NEXT:    movl 12(%eax), %edx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    bswapl %edx
; X86-NEXT:    xorl %eax, %eax
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    jne .LBB16_1
; X86-NEXT:  # BB#5: # %endblock
; X86-NEXT:    popl %esi
; X86-NEXT:    retl
; X86-NEXT:  .LBB16_1: # %res_block
; X86-NEXT:    cmpl %edx, %ecx
; X86-NEXT:    movl $-1, %ecx
; X86-NEXT:    movl $1, %eax
; X86-NEXT:    cmovbl %ecx, %eax
; X86-NEXT:    popl %esi
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $16
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    retl
;
; X64-LABEL: length16:
@@ -660,32 +588,29 @@ define i32 @length16(i8* %X, i8* %Y) nounwind {
}

define i1 @length16_eq(i8* %x, i8* %y) nounwind {
; X86-LABEL: length16_eq:
; X86:       # BB#0: # %loadbb
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl (%ecx), %edx
; X86-NEXT:    cmpl (%eax), %edx
; X86-NEXT:    jne .LBB17_1
; X86-NEXT:  # BB#2: # %loadbb1
; X86-NEXT:    movl 4(%ecx), %edx
; X86-NEXT:    cmpl 4(%eax), %edx
; X86-NEXT:    jne .LBB17_1
; X86-NEXT:  # BB#3: # %loadbb2
; X86-NEXT:    movl 8(%ecx), %edx
; X86-NEXT:    cmpl 8(%eax), %edx
; X86-NEXT:    jne .LBB17_1
; X86-NEXT:  # BB#4: # %loadbb3
; X86-NEXT:    movl 12(%ecx), %edx
; X86-NEXT:    xorl %ecx, %ecx
; X86-NEXT:    cmpl 12(%eax), %edx
; X86-NEXT:    je .LBB17_5
; X86-NEXT:  .LBB17_1: # %res_block
; X86-NEXT:    movl $1, %ecx
; X86-NEXT:  .LBB17_5: # %endblock
; X86-NEXT:    testl %ecx, %ecx
; X86-NEXT:    setne %al
; X86-NEXT:    retl
; X86-NOSSE-LABEL: length16_eq:
; X86-NOSSE:       # BB#0:
; X86-NOSSE-NEXT:    pushl $0
; X86-NOSSE-NEXT:    pushl $16
; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT:    calll memcmp
; X86-NOSSE-NEXT:    addl $16, %esp
; X86-NOSSE-NEXT:    testl %eax, %eax
; X86-NOSSE-NEXT:    setne %al
; X86-NOSSE-NEXT:    retl
;
; X86-SSE2-LABEL: length16_eq:
; X86-SSE2:       # BB#0:
; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT:    movdqu (%ecx), %xmm0
; X86-SSE2-NEXT:    movdqu (%eax), %xmm1
; X86-SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
; X86-SSE2-NEXT:    pmovmskb %xmm1, %eax
; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT:    setne %al
; X86-SSE2-NEXT:    retl
;
; X64-LABEL: length16_eq:
; X64:       # BB#0: # %loadbb
@@ -709,27 +634,27 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind {
}

define i1 @length16_eq_const(i8* %X) nounwind {
; X86-LABEL: length16_eq_const:
; X86:       # BB#0: # %loadbb
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    cmpl $858927408, (%eax) # imm = 0x33323130
; X86-NEXT:    jne .LBB18_1
; X86-NEXT:  # BB#2: # %loadbb1
; X86-NEXT:    cmpl $926299444, 4(%eax) # imm = 0x37363534
; X86-NEXT:    jne .LBB18_1
; X86-NEXT:  # BB#3: # %loadbb2
; X86-NEXT:    cmpl $825243960, 8(%eax) # imm = 0x31303938
; X86-NEXT:    jne .LBB18_1
; X86-NEXT:  # BB#4: # %loadbb3
; X86-NEXT:    xorl %ecx, %ecx
; X86-NEXT:    cmpl $892613426, 12(%eax) # imm = 0x35343332
; X86-NEXT:    je .LBB18_5
; X86-NEXT:  .LBB18_1: # %res_block
; X86-NEXT:    movl $1, %ecx
; X86-NEXT:  .LBB18_5: # %endblock
; X86-NEXT:    testl %ecx, %ecx
; X86-NEXT:    sete %al
; X86-NEXT:    retl
; X86-NOSSE-LABEL: length16_eq_const:
; X86-NOSSE:       # BB#0:
; X86-NOSSE-NEXT:    pushl $0
; X86-NOSSE-NEXT:    pushl $16
; X86-NOSSE-NEXT:    pushl $.L.str
; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT:    calll memcmp
; X86-NOSSE-NEXT:    addl $16, %esp
; X86-NOSSE-NEXT:    testl %eax, %eax
; X86-NOSSE-NEXT:    sete %al
; X86-NOSSE-NEXT:    retl
;
; X86-SSE2-LABEL: length16_eq_const:
; X86-SSE2:       # BB#0:
; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT:    movdqu (%eax), %xmm0
; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm0
; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
; X86-SSE2-NEXT:    sete %al
; X86-SSE2-NEXT:    retl
;
; X64-LABEL: length16_eq_const:
; X64:       # BB#0: # %loadbb
@@ -752,6 +677,82 @@ define i1 @length16_eq_const(i8* %X) nounwind {
  ret i1 %c
}

; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914

define i32 @length24(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length24:
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $24
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    retl
;
; X64-LABEL: length24:
; X64:       # BB#0:
; X64-NEXT:    movl $24, %edx
; X64-NEXT:    jmp memcmp # TAILCALL
  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind
  ret i32 %m
}

define i1 @length24_eq(i8* %x, i8* %y) nounwind {
; X86-LABEL: length24_eq:
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $24
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    sete %al
; X86-NEXT:    retl
;
; X64-LABEL: length24_eq:
; X64:       # BB#0:
; X64-NEXT:    pushq %rax
; X64-NEXT:    movl $24, %edx
; X64-NEXT:    callq memcmp
; X64-NEXT:    testl %eax, %eax
; X64-NEXT:    sete %al
; X64-NEXT:    popq %rcx
; X64-NEXT:    retq
  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind
  %cmp = icmp eq i32 %call, 0
  ret i1 %cmp
}

define i1 @length24_eq_const(i8* %X) nounwind {
; X86-LABEL: length24_eq_const:
; X86:       # BB#0:
; X86-NEXT:    pushl $0
; X86-NEXT:    pushl $24
; X86-NEXT:    pushl $.L.str
; X86-NEXT:    pushl {{[0-9]+}}(%esp)
; X86-NEXT:    calll memcmp
; X86-NEXT:    addl $16, %esp
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    setne %al
; X86-NEXT:    retl
;
; X64-LABEL: length24_eq_const:
; X64:       # BB#0:
; X64-NEXT:    pushq %rax
; X64-NEXT:    movl $.L.str, %esi
; X64-NEXT:    movl $24, %edx
; X64-NEXT:    callq memcmp
; X64-NEXT:    testl %eax, %eax
; X64-NEXT:    setne %al
; X64-NEXT:    popq %rcx
; X64-NEXT:    retq
  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind
  %c = icmp ne i32 %m, 0
  ret i1 %c
}

define i32 @length32(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length32:
; X86:       # BB#0:
@@ -764,43 +765,9 @@ define i32 @length32(i8* %X, i8* %Y) nounwind {
; X86-NEXT:    retl
;
; X64-LABEL: length32:
; X64:       # BB#0: # %loadbb
; X64-NEXT:    movq (%rdi), %rcx
; X64-NEXT:    movq (%rsi), %rdx
; X64-NEXT:    bswapq %rcx
; X64-NEXT:    bswapq %rdx
; X64-NEXT:    cmpq %rdx, %rcx
; X64-NEXT:    jne .LBB19_1
; X64-NEXT:  # BB#2: # %loadbb1
; X64-NEXT:    movq 8(%rdi), %rcx
; X64-NEXT:    movq 8(%rsi), %rdx
; X64-NEXT:    bswapq %rcx
; X64-NEXT:    bswapq %rdx
; X64-NEXT:    cmpq %rdx, %rcx
; X64-NEXT:    jne .LBB19_1
; X64-NEXT:  # BB#3: # %loadbb2
; X64-NEXT:    movq 16(%rdi), %rcx
; X64-NEXT:    movq 16(%rsi), %rdx
; X64-NEXT:    bswapq %rcx
; X64-NEXT:    bswapq %rdx
; X64-NEXT:    cmpq %rdx, %rcx
; X64-NEXT:    jne .LBB19_1
; X64-NEXT:  # BB#4: # %loadbb3
; X64-NEXT:    movq 24(%rdi), %rcx
; X64-NEXT:    movq 24(%rsi), %rdx
; X64-NEXT:    bswapq %rcx
; X64-NEXT:    bswapq %rdx
; X64-NEXT:    xorl %eax, %eax
; X64-NEXT:    cmpq %rdx, %rcx
; X64-NEXT:    jne .LBB19_1
; X64-NEXT:  # BB#5: # %endblock
; X64-NEXT:    retq
; X64-NEXT:  .LBB19_1: # %res_block
; X64-NEXT:    cmpq %rdx, %rcx
; X64-NEXT:    movl $-1, %ecx
; X64-NEXT:    movl $1, %eax
; X64-NEXT:    cmovbl %ecx, %eax
; X64-NEXT:    retq
; X64:       # BB#0:
; X64-NEXT:    movl $32, %edx
; X64-NEXT:    jmp memcmp # TAILCALL
  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
  ret i32 %m
}
@@ -820,30 +787,25 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind {
; X86-NEXT:    sete %al
; X86-NEXT:    retl
;
; X64-LABEL: length32_eq:
; X64:       # BB#0: # %loadbb
; X64-NEXT:    movq (%rdi), %rax
; X64-NEXT:    cmpq (%rsi), %rax
; X64-NEXT:    jne .LBB20_1
; X64-NEXT:  # BB#2: # %loadbb1
; X64-NEXT:    movq 8(%rdi), %rax
; X64-NEXT:    cmpq 8(%rsi), %rax
; X64-NEXT:    jne .LBB20_1
; X64-NEXT:  # BB#3: # %loadbb2
; X64-NEXT:    movq 16(%rdi), %rax
; X64-NEXT:    cmpq 16(%rsi), %rax
; X64-NEXT:    jne .LBB20_1
; X64-NEXT:  # BB#4: # %loadbb3
; X64-NEXT:    movq 24(%rdi), %rcx
; X64-NEXT:    xorl %eax, %eax
; X64-NEXT:    cmpq 24(%rsi), %rcx
; X64-NEXT:    je .LBB20_5
; X64-NEXT:  .LBB20_1: # %res_block
; X64-NEXT:    movl $1, %eax
; X64-NEXT:  .LBB20_5: # %endblock
; X64-NEXT:    testl %eax, %eax
; X64-NEXT:    sete %al
; X64-NEXT:    retq
; X64-SSE2-LABEL: length32_eq:
; X64-SSE2:       # BB#0:
; X64-SSE2-NEXT:    pushq %rax
; X64-SSE2-NEXT:    movl $32, %edx
; X64-SSE2-NEXT:    callq memcmp
; X64-SSE2-NEXT:    testl %eax, %eax
; X64-SSE2-NEXT:    sete %al
; X64-SSE2-NEXT:    popq %rcx
; X64-SSE2-NEXT:    retq
;
; X64-AVX2-LABEL: length32_eq:
; X64-AVX2:       # BB#0:
; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT:    vpcmpeqb (%rsi), %ymm0, %ymm0
; X64-AVX2-NEXT:    vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT:    cmpl $-1, %eax
; X64-AVX2-NEXT:    sete %al
; X64-AVX2-NEXT:    vzeroupper
; X64-AVX2-NEXT:    retq
  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind
  %cmp = icmp eq i32 %call, 0
  ret i1 %cmp
@@ -862,30 +824,26 @@ define i1 @length32_eq_const(i8* %X) nounwind {
; X86-NEXT:    setne %al
; X86-NEXT:    retl
;
; X64-LABEL: length32_eq_const:
; X64:       # BB#0: # %loadbb
; X64-NEXT:    movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
; X64-NEXT:    cmpq %rax, (%rdi)
; X64-NEXT:    jne .LBB21_1
; X64-NEXT:  # BB#2: # %loadbb1
; X64-NEXT:    movabsq $3833745473465760056, %rax # imm = 0x3534333231303938
; X64-NEXT:    cmpq %rax, 8(%rdi)
; X64-NEXT:    jne .LBB21_1
; X64-NEXT:  # BB#3: # %loadbb2
; X64-NEXT:    movabsq $3689065127958034230, %rax # imm = 0x3332313039383736
; X64-NEXT:    cmpq %rax, 16(%rdi)
; X64-NEXT:    jne .LBB21_1
; X64-NEXT:  # BB#4: # %loadbb3
; X64-NEXT:    xorl %eax, %eax
; X64-NEXT:    movabsq $3544395820347831604, %rcx # imm = 0x3130393837363534
; X64-NEXT:    cmpq %rcx, 24(%rdi)
; X64-NEXT:    je .LBB21_5
; X64-NEXT:  .LBB21_1: # %res_block
; X64-NEXT:    movl $1, %eax
; X64-NEXT:  .LBB21_5: # %endblock
; X64-NEXT:    testl %eax, %eax
; X64-NEXT:    setne %al
; X64-NEXT:    retq
; X64-SSE2-LABEL: length32_eq_const:
; X64-SSE2:       # BB#0:
; X64-SSE2-NEXT:    pushq %rax
; X64-SSE2-NEXT:    movl $.L.str, %esi
; X64-SSE2-NEXT:    movl $32, %edx
; X64-SSE2-NEXT:    callq memcmp
; X64-SSE2-NEXT:    testl %eax, %eax
; X64-SSE2-NEXT:    setne %al
; X64-SSE2-NEXT:    popq %rcx
; X64-SSE2-NEXT:    retq
;
; X64-AVX2-LABEL: length32_eq_const:
; X64-AVX2:       # BB#0:
; X64-AVX2-NEXT:    vmovdqu (%rdi), %ymm0
; X64-AVX2-NEXT:    vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT:    vpmovmskb %ymm0, %eax
; X64-AVX2-NEXT:    cmpl $-1, %eax
; X64-AVX2-NEXT:    setne %al
; X64-AVX2-NEXT:    vzeroupper
; X64-AVX2-NEXT:    retq
  %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind
  %c = icmp ne i32 %m, 0
  ret i1 %c
+62 −1030

File changed.

Preview size limit exceeded, changes collapsed.