Commit 8e5d7e51 authored by Tom Stellard's avatar Tom Stellard
Browse files

Merging r344824:

------------------------------------------------------------------------
r344824 | ctopper | 2018-10-19 18:30:00 -0700 (Fri, 19 Oct 2018) | 14 lines

[X86] When checking the bits in cpu_features for function multiversioning dispatcher in the resolver, make sure all the required bits are set. Not just one of them

Summary:
The multiversioning code repurposed the code from __builtin_cpu_supports for checking if a single feature is enabled. That code essentially performed (_cpu_features & (1 << C)) != 0. But with the multiversioning path, the mask is no longer guaranteed to be a power of 2. So we return true anytime any one of the bits in the mask is set not just all of the bits.

The correct check is (_cpu_features & mask) == mask

Reviewers: erichkeane, echristo

Reviewed By: echristo

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D53460
------------------------------------------------------------------------

llvm-svn: 344923
parent f2ee78cc
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -8952,9 +8952,9 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) {
      Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));

  // Check the value of the bit corresponding to the feature requested.
  Value *Bitset = Builder.CreateAnd(
      Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
  return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
  Value *Mask = Builder.getInt32(FeaturesMask);
  Value *Bitset = Builder.CreateAnd(Features, Mask);
  return Builder.CreateICmpEQ(Bitset, Mask);
}

Value *CodeGenFunction::EmitX86CpuInit() {
+16 −1
Original line number Diff line number Diff line
@@ -70,6 +70,22 @@ void bar4() {
// CHECK: ret void ()* @foo_decls.sse4.2
// CHECK: ret void ()* @foo_decls

// CHECK: define void @bar4()
// CHECK: call void @foo_multi.ifunc()

// CHECK: define void ()* @foo_multi.resolver() comdat
// CHECK: and i32 %{{.*}}, 4352
// CHECK: icmp eq i32 %{{.*}}, 4352
// CHECK: ret void ()* @foo_multi.fma4_sse4.2
// CHECK: icmp eq i32 %{{.*}}, 12
// CHECK: and i32 %{{.*}}, 4352
// CHECK: icmp eq i32 %{{.*}}, 4352
// CHECK: ret void ()* @foo_multi.arch_ivybridge_fma4_sse4.2
// CHECK: and i32 %{{.*}}, 768
// CHECK: icmp eq i32 %{{.*}}, 768
// CHECK: ret void ()* @foo_multi.avx_sse4.2
// CHECK: ret void ()* @foo_multi

// CHECK: declare i32 @foo.arch_sandybridge()

// CHECK: define available_externally i32 @foo_inline.sse4.2()
@@ -88,4 +104,3 @@ void bar4() {
// CHECK: define available_externally void @foo_multi.avx_sse4.2()
// CHECK: define available_externally void @foo_multi.fma4_sse4.2()
// CHECK: define available_externally void @foo_multi.arch_ivybridge_fma4_sse4.2()
+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ int main() {

  // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 3, i32 0)
  // CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256
  // CHECK: = icmp ne i32 [[AND]], 0
  // CHECK: = icmp eq i32 [[AND]], 256

  return 0;
}