Commit 0203f70b authored by Tom Stellard's avatar Tom Stellard
Browse files

Merging r360512:

------------------------------------------------------------------------
r360512 | ctopper | 2019-05-10 21:19:33 -0700 (Fri, 10 May 2019) | 5 lines

[X86] Don't emit MOVNTDQA loads from fast-isel without SSE4.1.

We were checking for SSE4.1 for FP types, but not integer 128-bit types.

Fixes PR41837.
------------------------------------------------------------------------

llvm-svn: 360749
parent 7c1f15e3
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
  case MVT::v2i64:
  case MVT::v8i16:
  case MVT::v16i8:
    if (IsNonTemporal && Alignment >= 16)
    if (IsNonTemporal && Alignment >= 16 && HasSSE41)
      Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
            HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
    else if (Alignment >= 16)
+56 −16
Original line number Diff line number Diff line
@@ -300,10 +300,20 @@ entry:
}

define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) {
; SSE-LABEL: test_load_nt16xi8:
; SSE:       # %bb.0: # %entry
; SSE-NEXT:    movntdqa (%rdi), %xmm0
; SSE-NEXT:    retq
; SSE2-LABEL: test_load_nt16xi8:
; SSE2:       # %bb.0: # %entry
; SSE2-NEXT:    movdqa (%rdi), %xmm0
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_load_nt16xi8:
; SSE4A:       # %bb.0: # %entry
; SSE4A-NEXT:    movdqa (%rdi), %xmm0
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_load_nt16xi8:
; SSE41:       # %bb.0: # %entry
; SSE41-NEXT:    movntdqa (%rdi), %xmm0
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_load_nt16xi8:
; AVX:       # %bb.0: # %entry
@@ -320,10 +330,20 @@ entry:
}

define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) {
; SSE-LABEL: test_load_nt8xi16:
; SSE:       # %bb.0: # %entry
; SSE-NEXT:    movntdqa (%rdi), %xmm0
; SSE-NEXT:    retq
; SSE2-LABEL: test_load_nt8xi16:
; SSE2:       # %bb.0: # %entry
; SSE2-NEXT:    movdqa (%rdi), %xmm0
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_load_nt8xi16:
; SSE4A:       # %bb.0: # %entry
; SSE4A-NEXT:    movdqa (%rdi), %xmm0
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_load_nt8xi16:
; SSE41:       # %bb.0: # %entry
; SSE41-NEXT:    movntdqa (%rdi), %xmm0
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_load_nt8xi16:
; AVX:       # %bb.0: # %entry
@@ -340,10 +360,20 @@ entry:
}

define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) {
; SSE-LABEL: test_load_nt4xi32:
; SSE:       # %bb.0: # %entry
; SSE-NEXT:    movntdqa (%rdi), %xmm0
; SSE-NEXT:    retq
; SSE2-LABEL: test_load_nt4xi32:
; SSE2:       # %bb.0: # %entry
; SSE2-NEXT:    movdqa (%rdi), %xmm0
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_load_nt4xi32:
; SSE4A:       # %bb.0: # %entry
; SSE4A-NEXT:    movdqa (%rdi), %xmm0
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_load_nt4xi32:
; SSE41:       # %bb.0: # %entry
; SSE41-NEXT:    movntdqa (%rdi), %xmm0
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_load_nt4xi32:
; AVX:       # %bb.0: # %entry
@@ -360,10 +390,20 @@ entry:
}

define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) {
; SSE-LABEL: test_load_nt2xi64:
; SSE:       # %bb.0: # %entry
; SSE-NEXT:    movntdqa (%rdi), %xmm0
; SSE-NEXT:    retq
; SSE2-LABEL: test_load_nt2xi64:
; SSE2:       # %bb.0: # %entry
; SSE2-NEXT:    movdqa (%rdi), %xmm0
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_load_nt2xi64:
; SSE4A:       # %bb.0: # %entry
; SSE4A-NEXT:    movdqa (%rdi), %xmm0
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_load_nt2xi64:
; SSE41:       # %bb.0: # %entry
; SSE41-NEXT:    movntdqa (%rdi), %xmm0
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_load_nt2xi64:
; AVX:       # %bb.0: # %entry