Loading llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +4 −4 Original line number Diff line number Diff line Loading @@ -40,8 +40,8 @@ static Constant *getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL) { /// each element's most significant bit (the sign bit). static Value *getBoolVecFromMask(Value *Mask, const DataLayout &DL) { // Fold Constant Mask. if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) return getNegativeIsTrueBoolVec(ConstantMask, DL); if (isa<ConstantInt, ConstantFP, ConstantDataVector>(Mask)) return getNegativeIsTrueBoolVec(cast<Constant>(Mask), DL); // Mask was extended from a boolean vector. Value *ExtMask; Loading Loading @@ -2973,9 +2973,9 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } // Constant Mask - select 1st/2nd argument lane based on top bit of mask. if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) { if (isa<ConstantInt, ConstantFP, ConstantDataVector>(Mask)) { Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask, IC.getDataLayout()); getNegativeIsTrueBoolVec(cast<Constant>(Mask), IC.getDataLayout()); return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); } unsigned BitWidth = Mask->getType()->getScalarSizeInBits(); Loading llvm/test/Transforms/InstCombine/X86/blend_x86.ll +50 −1 Original line number Diff line number Diff line ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -use-constant-int-for-fixed-length-splat=false -use-constant-fp-for-fixed-length-splat=false -S | FileCheck %s ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -use-constant-int-for-fixed-length-splat -use-constant-fp-for-fixed-length-splat -S | FileCheck %s define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd( Loading @@ -18,6 +19,14 @@ define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) ret <2 x double> %1 } define <2 x double> @constant_blendvpd_nzero(<2 x double> %xy, <2 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd_nzero( ; CHECK-NEXT: ret <2 x double> [[AB:%.*]] ; %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> splat (double -0.000000e+00)) ret <2 x double> %1 } define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) { ; CHECK-LABEL: @constant_blendvpd_dup( ; CHECK-NEXT: ret <2 x double> [[XY:%.*]] Loading @@ -43,6 +52,14 @@ define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) ret <4 x float> %1 } define <4 x float> @constant_blendvps_nzero(<4 x float> %xyzw, <4 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps_nzero( ; CHECK-NEXT: ret <4 x float> [[ABCD:%.*]] ; %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> splat (float -0.000000e+00)) ret <4 x float> %1 } define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) { ; CHECK-LABEL: @constant_blendvps_dup( ; CHECK-NEXT: ret <4 x float> [[XYZW:%.*]] Loading @@ -68,6 +85,14 @@ define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) { ret <16 x i8> %1 } define <16 x i8> @constant_pblendvb_all_ones(<16 x i8> %xyzw, <16 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb_all_ones( ; CHECK-NEXT: ret <16 x i8> [[ABCD:%.*]] ; %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> splat (i8 -1)) ret <16 x i8> %1 } define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) { ; CHECK-LABEL: @constant_pblendvb_dup( ; CHECK-NEXT: ret <16 x i8> [[XYZW:%.*]] Loading @@ -93,6 +118,14 @@ define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> % ret <4 x double> %1 } define <4 x double> @constant_blendvpd_avx_nzero(<4 x double> %xy, <4 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd_avx_nzero( ; CHECK-NEXT: ret <4 x double> [[AB:%.*]] ; %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> splat (double -0.000000e+00)) ret <4 x double> %1 } define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) { ; CHECK-LABEL: @constant_blendvpd_avx_dup( ; CHECK-NEXT: ret <4 x double> [[XY:%.*]] Loading @@ -118,6 +151,14 @@ define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %a ret <8 x float> %1 } define <8 x float> @constant_blendvps_avx_nzero(<8 x float> %xyzw, <8 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps_avx_nzero( ; CHECK-NEXT: ret <8 x float> [[ABCD:%.*]] ; %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> splat (float -0.000000e+00)) ret <8 x float> %1 } define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) { ; CHECK-LABEL: @constant_blendvps_avx_dup( ; CHECK-NEXT: ret <8 x float> [[XYZW:%.*]] Loading Loading @@ -147,6 +188,14 @@ define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) ret <32 x i8> %1 } define <32 x i8> @constant_pblendvb_avx2_all_ones(<32 x i8> %xyzw, <32 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb_avx2_all_ones( ; CHECK-NEXT: ret <32 x i8> [[ABCD:%.*]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> splat (i8 -1)) ret <32 x i8> %1 } define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) { ; CHECK-LABEL: @constant_pblendvb_avx2_dup( ; CHECK-NEXT: ret <32 x i8> [[XYZW:%.*]] Loading llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll +19 −1 Original line number Diff line number Diff line ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -use-constant-int-for-fixed-length-splat=false -S | FileCheck %s ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -use-constant-int-for-fixed-length-splat -S | FileCheck %s ;; MASKED LOADS Loading Loading @@ -48,6 +49,14 @@ define <4 x float> @mload_fake_ones(ptr %f) { ret <4 x float> %ld } define <4 x float> @mload_fake_ones_splat(ptr %f) { ; CHECK-LABEL: @mload_fake_ones_splat( ; CHECK-NEXT: ret <4 x float> zeroinitializer ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> splat(i32 1)) ret <4 x float> %ld } ; All mask bits are set, so this is just a vector load. define <4 x float> @mload_real_ones(ptr %f) { Loading @@ -59,6 +68,15 @@ define <4 x float> @mload_real_ones(ptr %f) { ret <4 x float> %ld } define <4 x float> @mload_real_ones_splat(ptr %f) { ; CHECK-LABEL: @mload_real_ones_splat( ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x float>, ptr [[F:%.*]], align 1 ; CHECK-NEXT: ret <4 x float> [[UNMASKEDLOAD]] ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> splat(i32 -1)) ret <4 x float> %ld } ; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further. define <4 x float> @mload_one_one(ptr %f) { Loading Loading
llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +4 −4 Original line number Diff line number Diff line Loading @@ -40,8 +40,8 @@ static Constant *getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL) { /// each element's most significant bit (the sign bit). static Value *getBoolVecFromMask(Value *Mask, const DataLayout &DL) { // Fold Constant Mask. if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) return getNegativeIsTrueBoolVec(ConstantMask, DL); if (isa<ConstantInt, ConstantFP, ConstantDataVector>(Mask)) return getNegativeIsTrueBoolVec(cast<Constant>(Mask), DL); // Mask was extended from a boolean vector. Value *ExtMask; Loading Loading @@ -2973,9 +2973,9 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } // Constant Mask - select 1st/2nd argument lane based on top bit of mask. if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) { if (isa<ConstantInt, ConstantFP, ConstantDataVector>(Mask)) { Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask, IC.getDataLayout()); getNegativeIsTrueBoolVec(cast<Constant>(Mask), IC.getDataLayout()); return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); } unsigned BitWidth = Mask->getType()->getScalarSizeInBits(); Loading
llvm/test/Transforms/InstCombine/X86/blend_x86.ll +50 −1 Original line number Diff line number Diff line ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -use-constant-int-for-fixed-length-splat=false -use-constant-fp-for-fixed-length-splat=false -S | FileCheck %s ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -use-constant-int-for-fixed-length-splat -use-constant-fp-for-fixed-length-splat -S | FileCheck %s define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd( Loading @@ -18,6 +19,14 @@ define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) ret <2 x double> %1 } define <2 x double> @constant_blendvpd_nzero(<2 x double> %xy, <2 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd_nzero( ; CHECK-NEXT: ret <2 x double> [[AB:%.*]] ; %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> splat (double -0.000000e+00)) ret <2 x double> %1 } define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) { ; CHECK-LABEL: @constant_blendvpd_dup( ; CHECK-NEXT: ret <2 x double> [[XY:%.*]] Loading @@ -43,6 +52,14 @@ define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) ret <4 x float> %1 } define <4 x float> @constant_blendvps_nzero(<4 x float> %xyzw, <4 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps_nzero( ; CHECK-NEXT: ret <4 x float> [[ABCD:%.*]] ; %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> splat (float -0.000000e+00)) ret <4 x float> %1 } define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) { ; CHECK-LABEL: @constant_blendvps_dup( ; CHECK-NEXT: ret <4 x float> [[XYZW:%.*]] Loading @@ -68,6 +85,14 @@ define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) { ret <16 x i8> %1 } define <16 x i8> @constant_pblendvb_all_ones(<16 x i8> %xyzw, <16 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb_all_ones( ; CHECK-NEXT: ret <16 x i8> [[ABCD:%.*]] ; %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> splat (i8 -1)) ret <16 x i8> %1 } define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) { ; CHECK-LABEL: @constant_pblendvb_dup( ; CHECK-NEXT: ret <16 x i8> [[XYZW:%.*]] Loading @@ -93,6 +118,14 @@ define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> % ret <4 x double> %1 } define <4 x double> @constant_blendvpd_avx_nzero(<4 x double> %xy, <4 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd_avx_nzero( ; CHECK-NEXT: ret <4 x double> [[AB:%.*]] ; %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> splat (double -0.000000e+00)) ret <4 x double> %1 } define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) { ; CHECK-LABEL: @constant_blendvpd_avx_dup( ; CHECK-NEXT: ret <4 x double> [[XY:%.*]] Loading @@ -118,6 +151,14 @@ define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %a ret <8 x float> %1 } define <8 x float> @constant_blendvps_avx_nzero(<8 x float> %xyzw, <8 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps_avx_nzero( ; CHECK-NEXT: ret <8 x float> [[ABCD:%.*]] ; %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> splat (float -0.000000e+00)) ret <8 x float> %1 } define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) { ; CHECK-LABEL: @constant_blendvps_avx_dup( ; CHECK-NEXT: ret <8 x float> [[XYZW:%.*]] Loading Loading @@ -147,6 +188,14 @@ define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) ret <32 x i8> %1 } define <32 x i8> @constant_pblendvb_avx2_all_ones(<32 x i8> %xyzw, <32 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb_avx2_all_ones( ; CHECK-NEXT: ret <32 x i8> [[ABCD:%.*]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> splat (i8 -1)) ret <32 x i8> %1 } define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) { ; CHECK-LABEL: @constant_pblendvb_avx2_dup( ; CHECK-NEXT: ret <32 x i8> [[XYZW:%.*]] Loading
llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll +19 −1 Original line number Diff line number Diff line ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -use-constant-int-for-fixed-length-splat=false -S | FileCheck %s ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -use-constant-int-for-fixed-length-splat -S | FileCheck %s ;; MASKED LOADS Loading Loading @@ -48,6 +49,14 @@ define <4 x float> @mload_fake_ones(ptr %f) { ret <4 x float> %ld } define <4 x float> @mload_fake_ones_splat(ptr %f) { ; CHECK-LABEL: @mload_fake_ones_splat( ; CHECK-NEXT: ret <4 x float> zeroinitializer ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> splat(i32 1)) ret <4 x float> %ld } ; All mask bits are set, so this is just a vector load. define <4 x float> @mload_real_ones(ptr %f) { Loading @@ -59,6 +68,15 @@ define <4 x float> @mload_real_ones(ptr %f) { ret <4 x float> %ld } define <4 x float> @mload_real_ones_splat(ptr %f) { ; CHECK-LABEL: @mload_real_ones_splat( ; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x float>, ptr [[F:%.*]], align 1 ; CHECK-NEXT: ret <4 x float> [[UNMASKEDLOAD]] ; %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> splat(i32 -1)) ret <4 x float> %ld } ; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further. define <4 x float> @mload_one_one(ptr %f) { Loading