Commit 378b1e60 authored by Craig Topper's avatar Craig Topper
Browse files

[X86] Assign avx512bf16 instructions to the SSEPackedSingle ExeDomain.

parent 05ff3323
Loading
Loading
Loading
Loading
+17 −13
Original line number Diff line number Diff line
@@ -12235,6 +12235,7 @@ multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
  }
}
let ExeDomain = SSEPackedSingle in
defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
                                        SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
                                        avx512vl_f32_info, avx512vl_i16_info,
@@ -12243,6 +12244,7 @@ defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
// Truncate Float to BFloat16
multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
                             X86SchedWriteWidths sched> {
  let ExeDomain = SSEPackedSingle in {
  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
@@ -12256,6 +12258,8 @@ multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
                               X86cvtneps2bf16, X86cvtneps2bf16,
                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
    }
  } // Predicates = [HasBF16, HasVLX]
  } // ExeDomain = SSEPackedSingle
  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
@@ -12270,7 +12274,6 @@ multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
                  f256mem:$src), 0, "intel">;
}
}
defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
                                       SchedWriteCvtPD2PS>, T8XS,
@@ -12352,6 +12355,7 @@ multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
  }
}
let ExeDomain = SSEPackedSingle in
defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
                                       avx512vl_f32_info, avx512vl_i32_info,
                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
+6 −6
Original line number Diff line number Diff line
@@ -32,9 +32,9 @@ define <32 x i16> @stack_fold_cvtne2ps2bf16_mask(<16 x float> %a0, <16 x float>
; CHECK-NEXT:    nop
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT:    vmovaps (%rdi), %zmm2
; CHECK-NEXT:    vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} # 64-byte Folded Reload
; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT:    vmovaps %zmm2, %zmm0
; CHECK-NEXT:    retq
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
  %2 = call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a0, <16 x float> %a1)
@@ -194,9 +194,9 @@ define <16 x i16> @stack_fold_cvtne2ps2bf16_mask_ymm(<8 x float> %a0, <8 x float
; CHECK-NEXT:    nop
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vmovdqa (%rdi), %ymm2
; CHECK-NEXT:    vmovaps (%rdi), %ymm2
; CHECK-NEXT:    vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
; CHECK-NEXT:    vmovaps %ymm2, %ymm0
; CHECK-NEXT:    retq
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
  %2 = call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %a0, <8 x float> %a1)
@@ -361,9 +361,9 @@ define <8 x i16> @stack_fold_cvtne2ps2bf16_mask_xmm(<4 x float> %a0, <4 x float>
; CHECK-NEXT:    nop
; CHECK-NEXT:    #NO_APP
; CHECK-NEXT:    kmovd %esi, %k1
; CHECK-NEXT:    vmovdqa (%rdi), %xmm2
; CHECK-NEXT:    vmovaps (%rdi), %xmm2
; CHECK-NEXT:    vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
  %2 = call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %a0, <4 x float> %a1)