Loading llvm/lib/Target/X86/X86InstrAVX512.td +17 −13 Original line number Diff line number Diff line Loading @@ -12235,6 +12235,7 @@ multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, } } let ExeDomain = SSEPackedSingle in defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF avx512vl_f32_info, avx512vl_i16_info, Loading @@ -12243,6 +12244,7 @@ defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", // Truncate Float to BFloat16 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { let ExeDomain = SSEPackedSingle in { let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; Loading @@ -12256,6 +12258,8 @@ multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, X86cvtneps2bf16, X86cvtneps2bf16, sched.YMM, "{1to8}", "{y}">, EVEX_V256; } } // Predicates = [HasBF16, HasVLX] } // ExeDomain = SSEPackedSingle def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, Loading @@ -12270,7 +12274,6 @@ multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">; } } defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", SchedWriteCvtPD2PS>, T8XS, Loading Loading @@ -12352,6 +12355,7 @@ multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, } } let ExeDomain = SSEPackedSingle in defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, avx512vl_f32_info, avx512vl_i32_info, HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; llvm/test/CodeGen/X86/stack-folding-avx512bf16.ll +6 −6 Original line number Diff line number Diff line Loading @@ -32,9 +32,9 @@ define <32 x i16> @stack_fold_cvtne2ps2bf16_mask(<16 x float> %a0, <16 x float> ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 ; CHECK-NEXT: vmovaps (%rdi), %zmm2 ; CHECK-NEXT: vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} # 64-byte Folded Reload ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a0, <16 x float> %a1) Loading Loading @@ -194,9 +194,9 @@ define <16 x i16> @stack_fold_cvtne2ps2bf16_mask_ymm(<8 x float> %a0, <8 x float ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 ; CHECK-NEXT: vmovaps (%rdi), %ymm2 ; CHECK-NEXT: vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %a0, <8 x float> %a1) Loading Loading @@ -361,9 +361,9 @@ define <8 x i16> @stack_fold_cvtne2ps2bf16_mask_xmm(<4 x float> %a0, <4 x float> ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 ; CHECK-NEXT: vmovaps (%rdi), %xmm2 ; CHECK-NEXT: vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %a0, <4 x float> %a1) Loading Loading
llvm/lib/Target/X86/X86InstrAVX512.td +17 −13 Original line number Diff line number Diff line Loading @@ -12235,6 +12235,7 @@ multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, } } let ExeDomain = SSEPackedSingle in defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF avx512vl_f32_info, avx512vl_i16_info, Loading @@ -12243,6 +12244,7 @@ defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", // Truncate Float to BFloat16 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { let ExeDomain = SSEPackedSingle in { let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; Loading @@ -12256,6 +12258,8 @@ multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, X86cvtneps2bf16, X86cvtneps2bf16, sched.YMM, "{1to8}", "{y}">, EVEX_V256; } } // Predicates = [HasBF16, HasVLX] } // ExeDomain = SSEPackedSingle def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, Loading @@ -12270,7 +12274,6 @@ multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">; } } defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", SchedWriteCvtPD2PS>, T8XS, Loading Loading @@ -12352,6 +12355,7 @@ multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, } } let ExeDomain = SSEPackedSingle in defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, avx512vl_f32_info, avx512vl_i32_info, HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
llvm/test/CodeGen/X86/stack-folding-avx512bf16.ll +6 −6 Original line number Diff line number Diff line Loading @@ -32,9 +32,9 @@ define <32 x i16> @stack_fold_cvtne2ps2bf16_mask(<16 x float> %a0, <16 x float> ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 ; CHECK-NEXT: vmovaps (%rdi), %zmm2 ; CHECK-NEXT: vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} # 64-byte Folded Reload ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a0, <16 x float> %a1) Loading Loading @@ -194,9 +194,9 @@ define <16 x i16> @stack_fold_cvtne2ps2bf16_mask_ymm(<8 x float> %a0, <8 x float ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 ; CHECK-NEXT: vmovaps (%rdi), %ymm2 ; CHECK-NEXT: vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 {%k1} # 32-byte Folded Reload ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %a0, <8 x float> %a1) Loading Loading @@ -361,9 +361,9 @@ define <8 x i16> @stack_fold_cvtne2ps2bf16_mask_xmm(<4 x float> %a0, <4 x float> ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2 ; CHECK-NEXT: vmovaps (%rdi), %xmm2 ; CHECK-NEXT: vcvtne2ps2bf16 {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm2 {%k1} # 16-byte Folded Reload ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %a0, <4 x float> %a1) Loading