Loading llvm/lib/Target/X86/X86ISelLowering.cpp +7 −0 Original line number Diff line number Diff line Loading @@ -41961,6 +41961,13 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, SubVec.getOpcode() == X86ISD::SUBV_BROADCAST)) return DAG.getNode(SubVec.getOpcode(), dl, OpVT, SubVec.getOperand(0)); // concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x) if (SubVec == SubVec2 && SubVec.getOpcode() == ISD::SCALAR_TO_VECTOR && (Subtarget.hasAVX2() || (OpVT.getScalarSizeInBits() >= 32 && MayFoldLoad(SubVec.getOperand(0)))) && SubVec.getOperand(0).getValueType() == OpVT.getScalarType()) return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); // If we're inserting all zeros into the upper half, change this to // an insert into an all zeros vector. We will match this to a move // with implicit upper bit zeroing during isel. llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +2 −3 Original line number Diff line number Diff line Loading @@ -2718,10 +2718,9 @@ define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256: ; X64: # %bb.0: ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; X64-NEXT: vmovq (%rsi), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x16] ; X64-NEXT: # xmm2 = mem[0],zero ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vinserti32x4 $1, %xmm2, %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf3,0x6d,0x29,0x38,0xca,0x01] ; X64-NEXT: vbroadcasti32x2 (%rsi), %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e] ; X64-NEXT: # ymm1 {%k1} = mem[0,1,0,1,0,1,0,1] ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] ; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] Loading llvm/test/CodeGen/X86/subvector-broadcast.ll +11 −39 Original line number Diff line number Diff line Loading @@ -1657,46 +1657,18 @@ define <4 x double> @broadcast_v4f64_v2f64_4u61(<2 x double>* %vp, <4 x double> } define <8 x float> @broadcast_v8f32_v2f32_u1uu0uEu(<2 x float>* %vp, <8 x float> %default) { ; X32-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X32-AVX1: # %bb.0: ; X32-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 ; X32-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X32-AVX1-NEXT: retl ; ; X32-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm1 ; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X32-AVX2-NEXT: retl ; ; X32-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X32-AVX512: # %bb.0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512-NEXT: vbroadcastsd (%eax), %ymm1 ; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X32-AVX512-NEXT: retl ; ; X64-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X64-AVX1-NEXT: retq ; ; X64-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X64-AVX2-NEXT: retq ; X32-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vbroadcastsd (%eax), %ymm1 ; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X32-NEXT: retl ; ; X64-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vbroadcastsd (%rdi), %ymm1 ; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X64-AVX512-NEXT: retq ; X64-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X64: # %bb.0: ; X64-NEXT: vbroadcastsd (%rdi), %ymm1 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X64-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 0, i32 2, i32 3, i32 undef> %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x float> %shuf, <8 x float> %default Loading Loading
llvm/lib/Target/X86/X86ISelLowering.cpp +7 −0 Original line number Diff line number Diff line Loading @@ -41961,6 +41961,13 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, SubVec.getOpcode() == X86ISD::SUBV_BROADCAST)) return DAG.getNode(SubVec.getOpcode(), dl, OpVT, SubVec.getOperand(0)); // concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x) if (SubVec == SubVec2 && SubVec.getOpcode() == ISD::SCALAR_TO_VECTOR && (Subtarget.hasAVX2() || (OpVT.getScalarSizeInBits() >= 32 && MayFoldLoad(SubVec.getOperand(0)))) && SubVec.getOperand(0).getValueType() == OpVT.getScalarType()) return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); // If we're inserting all zeros into the upper half, change this to // an insert into an all zeros vector. We will match this to a move // with implicit upper bit zeroing during isel.
llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +2 −3 Original line number Diff line number Diff line Loading @@ -2718,10 +2718,9 @@ define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256: ; X64: # %bb.0: ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; X64-NEXT: vmovq (%rsi), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x16] ; X64-NEXT: # xmm2 = mem[0],zero ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vinserti32x4 $1, %xmm2, %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf3,0x6d,0x29,0x38,0xca,0x01] ; X64-NEXT: vbroadcasti32x2 (%rsi), %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e] ; X64-NEXT: # ymm1 {%k1} = mem[0,1,0,1,0,1,0,1] ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] ; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] Loading
llvm/test/CodeGen/X86/subvector-broadcast.ll +11 −39 Original line number Diff line number Diff line Loading @@ -1657,46 +1657,18 @@ define <4 x double> @broadcast_v4f64_v2f64_4u61(<2 x double>* %vp, <4 x double> } define <8 x float> @broadcast_v8f32_v2f32_u1uu0uEu(<2 x float>* %vp, <8 x float> %default) { ; X32-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X32-AVX1: # %bb.0: ; X32-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 ; X32-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X32-AVX1-NEXT: retl ; ; X32-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm1 ; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X32-AVX2-NEXT: retl ; ; X32-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X32-AVX512: # %bb.0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512-NEXT: vbroadcastsd (%eax), %ymm1 ; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X32-AVX512-NEXT: retl ; ; X64-AVX1-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 ; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X64-AVX1-NEXT: retq ; ; X64-AVX2-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 ; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X64-AVX2-NEXT: retq ; X32-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vbroadcastsd (%eax), %ymm1 ; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X32-NEXT: retl ; ; X64-AVX512-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vbroadcastsd (%rdi), %ymm1 ; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X64-AVX512-NEXT: retq ; X64-LABEL: broadcast_v8f32_v2f32_u1uu0uEu: ; X64: # %bb.0: ; X64-NEXT: vbroadcastsd (%rdi), %ymm1 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; X64-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 0, i32 2, i32 3, i32 undef> %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <8 x float> %shuf, <8 x float> %default Loading