Commit 3c312953 authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r261360:

------------------------------------------------------------------------
r261360 | dim | 2016-02-19 12:14:11 -0800 (Fri, 19 Feb 2016) | 19 lines

Fix incorrect selection of AVX512 sqrt when OptForSize is on

Summary:
When optimizing for size, sqrt calls can be incorrectly selected as
AVX512 VSQRT instructions.  This is because X86InstrAVX512.td has a
`Requires<[OptForSize]>` in its `avx512_sqrt_scalar` multiclass
definition.  Even if the target does not support AVX512, the class can
apparently still be chosen, leading to an incorrect selection of
`vsqrtss`.

In PR26625, this lead to an assertion: Reg >= X86::FP0 && Reg <=
X86::FP6 && "Expected FP register!", because the `vsqrtss` instruction
requires an XMM register, which is not available on i686 CPUs.

Reviewers: grosbach, resistor, joker.eph

Subscribers: spatel, emaste, llvm-commits

Differential Revision: http://reviews.llvm.org/D17414
------------------------------------------------------------------------

llvm-svn: 261367
parent 9b9dcd1e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -5896,7 +5896,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,

  def : Pat<(_.EltVT (OpNode (load addr:$src))),
            (!cast<Instruction>(NAME#SUFF#Zm)
                (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>;
                (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
}

multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
+20 −0
Original line number Diff line number Diff line
; RUN: llc < %s -mcpu=i686 2>&1 | FileCheck %s
; PR26625

target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386"

define float @x0(float %f) #0 {
entry:
  %call = tail call float @sqrtf(float %f) #1
  ret float %call
; CHECK-LABEL: x0:
; CHECK: flds
; CHECK-NEXT: fsqrt
; CHECK-NOT: vsqrtss
}

declare float @sqrtf(float) #0

attributes #0 = { nounwind optsize readnone }
attributes #1 = { nounwind optsize readnone }