Commit 57eb56b8 authored by Craig Topper's avatar Craig Topper
Browse files

[X86] Swap the 0 and the fudge factor in the constant pool for the 32-bit mode...

[X86] Swap the 0 and the fudge factor in the constant pool for the 32-bit mode i64->f32/f64/f80 uint_to_fp algorithm.

This allows us to generate better code for selecting the fixup
to load.

Previously when the sign was set we had to load offset 0. And
when it was clear we had to load offset 4. This required a testl,
setns, zero extend, and finally a mul by 4. By switching the offsets
we can just shift the sign bit into the lsb and multiply it by 4.
parent ab035647
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -19377,21 +19377,21 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
                                         MVT::i64, MMO);
  Chain = Fild.getValue(1);
  APInt FF(32, 0x5F800000ULL);
  // Check whether the sign bit is set.
  SDValue SignSet = DAG.getSetCC(
      dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
      Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
  // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
  // Build a 64 bit pair (FF, 0) in the constant pool, with FF in the hi bits.
  APInt FF(64, 0x5F80000000000000ULL);
  SDValue FudgePtr = DAG.getConstantPool(
      ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT);
      ConstantInt::get(*DAG.getContext(), FF), PtrVT);
  // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
  SDValue Zero = DAG.getIntPtrConstant(0, dl);
  SDValue Four = DAG.getIntPtrConstant(4, dl);
  SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four);
  SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero);
  FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset);
  // Load the value out, extending it from f32 to f80.
+2 −4
Original line number Diff line number Diff line
@@ -1886,11 +1886,9 @@ define <4 x float> @test_mm_cvtu64_ss(<4 x float> %__A, i64 %__B) {
; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
; X86-NEXT:    vmovq %xmm1, {{[0-9]+}}(%esp)
; X86-NEXT:    xorl %ecx, %ecx
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    setns %cl
; X86-NEXT:    shrl $31, %eax
; X86-NEXT:    fildll {{[0-9]+}}(%esp)
; X86-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
; X86-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
; X86-NEXT:    fstps {{[0-9]+}}(%esp)
; X86-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+2 −4
Original line number Diff line number Diff line
@@ -34,11 +34,9 @@ define fastcc double @uint64_to_fp(i64 %X) {
; CHECK-NEXT:    subl $16, %esp
; CHECK-NEXT:    movl %edx, {{[0-9]+}}(%esp)
; CHECK-NEXT:    movl %ecx, (%esp)
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    testl %edx, %edx
; CHECK-NEXT:    setns %al
; CHECK-NEXT:    shrl $31, %edx
; CHECK-NEXT:    fildll (%esp)
; CHECK-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
; CHECK-NEXT:    fadds {{\.LCPI.*}}(,%edx,4)
; CHECK-NEXT:    fstpl {{[0-9]+}}(%esp)
; CHECK-NEXT:    fldl {{[0-9]+}}(%esp)
; CHECK-NEXT:    movl %ebp, %esp
+6 −10
Original line number Diff line number Diff line
@@ -829,11 +829,9 @@ define x86_fp80 @uitofp_fp80_i64(i64 %a0) nounwind {
; X86-NEXT:    movl 12(%ebp), %ecx
; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT:    movl %eax, (%esp)
; X86-NEXT:    xorl %eax, %eax
; X86-NEXT:    testl %ecx, %ecx
; X86-NEXT:    setns %al
; X86-NEXT:    shrl $31, %ecx
; X86-NEXT:    fildll (%esp)
; X86-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
; X86-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
; X86-NEXT:    movl %ebp, %esp
; X86-NEXT:    popl %ebp
; X86-NEXT:    retl
@@ -843,7 +841,7 @@ define x86_fp80 @uitofp_fp80_i64(i64 %a0) nounwind {
; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
; X64-NEXT:    xorl %eax, %eax
; X64-NEXT:    testq %rdi, %rdi
; X64-NEXT:    setns %al
; X64-NEXT:    sets %al
; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
; X64-NEXT:    fadds {{\.LCPI.*}}(,%rax,4)
; X64-NEXT:    retq
@@ -863,11 +861,9 @@ define x86_fp80 @uitofp_fp80_i64_ld(i64 *%a0) nounwind {
; X86-NEXT:    movl 4(%eax), %eax
; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT:    movl %ecx, (%esp)
; X86-NEXT:    xorl %ecx, %ecx
; X86-NEXT:    testl %eax, %eax
; X86-NEXT:    setns %cl
; X86-NEXT:    shrl $31, %eax
; X86-NEXT:    fildll (%esp)
; X86-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
; X86-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
; X86-NEXT:    movl %ebp, %esp
; X86-NEXT:    popl %ebp
; X86-NEXT:    retl
@@ -878,7 +874,7 @@ define x86_fp80 @uitofp_fp80_i64_ld(i64 *%a0) nounwind {
; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
; X64-NEXT:    xorl %ecx, %ecx
; X64-NEXT:    testq %rax, %rax
; X64-NEXT:    setns %cl
; X64-NEXT:    sets %cl
; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
; X64-NEXT:    fadds {{\.LCPI.*}}(,%rcx,4)
; X64-NEXT:    retq
+6 −11
Original line number Diff line number Diff line
@@ -2410,11 +2410,9 @@ define double @uifdl(i64 %x) #0 {
; X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
; X87-NEXT:    movl %eax, (%esp)
; X87-NEXT:    xorl %eax, %eax
; X87-NEXT:    testl %ecx, %ecx
; X87-NEXT:    setns %al
; X87-NEXT:    shrl $31, %ecx
; X87-NEXT:    fildll (%esp)
; X87-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
; X87-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
; X87-NEXT:    fldl {{[0-9]+}}(%esp)
; X87-NEXT:    addl $20, %esp
@@ -2612,11 +2610,9 @@ define float @uiffl(i64 %x) #0 {
; X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
; X87-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; X87-NEXT:    xorl %eax, %eax
; X87-NEXT:    testl %ecx, %ecx
; X87-NEXT:    setns %al
; X87-NEXT:    shrl $31, %ecx
; X87-NEXT:    fildll {{[0-9]+}}(%esp)
; X87-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
; X87-NEXT:    fadds {{\.LCPI.*}}(,%ecx,4)
; X87-NEXT:    fstps {{[0-9]+}}(%esp)
; X87-NEXT:    flds {{[0-9]+}}(%esp)
; X87-NEXT:    addl $20, %esp
@@ -2627,11 +2623,10 @@ define float @uiffl(i64 %x) #0 {
; X86-SSE:       # %bb.0: # %entry
; X86-SSE-NEXT:    subl $20, %esp
; X86-SSE-NEXT:    .cfi_def_cfa_offset 24
; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT:    xorl %eax, %eax
; X86-SSE-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT:    setns %al
; X86-SSE-NEXT:    shrl $31, %eax
; X86-SSE-NEXT:    fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
; X86-SSE-NEXT:    fstps {{[0-9]+}}(%esp)
Loading