Unverified Commit 34a80c90 authored by JP Hafer's avatar JP Hafer Committed by GitHub
Browse files

[InstCombine] Add user-count bailout to isAllocSiteRemovable (#190347)

isAllocSiteRemovable() walks all transitive users of an alloc site, but
sites with many users are almost never removable. Profiling on
real-world codegen workloads (73,943 alloc sites) showed:

- 89 removable sites, max 1,392 users walked
- 73,854 non-removable sites, avg 31,305 users walked
- 2.31B total wasted user visits (~400s wall-clock on a 35-min build)

Skip the removability analysis when direct user count exceeds a
configurable threshold (default 2048, tunable via hidden cl::opt
-instcombine-max-allocsite-removable-users).

Also defer WeakTrackingVH conversion: collect into Instruction* first
and convert only when the site is actually removable.
parent 0ec4b220
Loading
Loading
Loading
Loading
+15 −3
Original line number Diff line number Diff line
@@ -144,6 +144,11 @@ static cl::opt<unsigned>
MaxArraySize("instcombine-maxarray-size", cl::init(1024),
             cl::desc("Maximum array size considered when doing a combine"));

static cl::opt<unsigned> MaxAllocSiteRemovableUsers(
    "instcombine-max-allocsite-removable-users", cl::Hidden, cl::init(2048),
    cl::desc("Maximum number of users to visit in alloc-site "
             "removability analysis"));

namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
} // end namespace llvm
@@ -3733,7 +3738,7 @@ static bool isRemovableWrite(CallBase &CB, Value *UsedV,
}

static std::optional<ModRefInfo>
isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users,
isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<Instruction *> &Users,
                     const TargetLibraryInfo &TLI, bool KnowInit) {
  SmallVector<Instruction*, 4> Worklist;
  const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
@@ -3744,6 +3749,8 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users,
    Instruction *PI = Worklist.pop_back_val();
    for (User *U : PI->users()) {
      Instruction *I = cast<Instruction>(U);
      if (Users.size() >= MaxAllocSiteRemovableUsers)
        return std::nullopt;
      switch (I->getOpcode()) {
      default:
        // Give up the moment we see something we can't handle.
@@ -3893,7 +3900,11 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
  // outputs of a program (when we convert a malloc to an alloca, the fact that
  // the allocation is now on the stack is potentially visible, for example),
  // but we believe in a permissible manner.
  SmallVector<WeakTrackingVH, 64> Users;
  //
  // Collect into Instruction* first to avoid expensive WeakTrackingVH
  // register/unregister overhead; convert to WeakTrackingVH only when the
  // site is actually removable.
  SmallVector<Instruction *, 64> RawUsers;

  // If we are removing an alloca with a dbg.declare, insert dbg.value calls
  // before each store.
@@ -3924,8 +3935,9 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
    KnowInitUndef = false;

  auto Removable =
      isAllocSiteRemovable(&MI, Users, TLI, KnowInitZero | KnowInitUndef);
      isAllocSiteRemovable(&MI, RawUsers, TLI, KnowInitZero | KnowInitUndef);
  if (Removable) {
    SmallVector<WeakTrackingVH, 64> Users(RawUsers.begin(), RawUsers.end());
    for (WeakTrackingVH &User : Users) {
      // Lowering all @llvm.objectsize and MTI calls first because they may use
      // a bitcast/GEP of the alloca we are removing.
+31 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=instcombine -S < %s | FileCheck %s --check-prefix=OPT
; RUN: opt -passes=instcombine -instcombine-max-allocsite-removable-users=1 -S < %s | FileCheck %s --check-prefix=NOOPT

; Test that alloca with few users is still removed by
; isAllocSiteRemovable (not blocked by user-count bailout).
; With a threshold of 1, the alloca is not removed.
define i32 @test_few_users_still_removed() {
; OPT-LABEL: @test_few_users_still_removed(
; OPT-NEXT:    ret i32 0
;
; NOOPT-LABEL: @test_few_users_still_removed(
; NOOPT-NEXT:    [[A:%.*]] = alloca [4 x i8], align 1
; NOOPT-NEXT:    [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 1
; NOOPT-NEXT:    [[V0:%.*]] = load i8, ptr [[A]], align 1
; NOOPT-NEXT:    [[V1:%.*]] = load i8, ptr [[P1]], align 1
; NOOPT-NEXT:    [[Z0:%.*]] = zext i8 [[V0]] to i32
; NOOPT-NEXT:    [[Z1:%.*]] = zext i8 [[V1]] to i32
; NOOPT-NEXT:    [[SUM:%.*]] = add nuw nsw i32 [[Z0]], [[Z1]]
; NOOPT-NEXT:    ret i32 [[SUM]]
;
  %a = alloca [4 x i8], align 1
  %p0 = getelementptr inbounds [4 x i8], ptr %a, i64 0, i64 0
  %p1 = getelementptr inbounds [4 x i8], ptr %a, i64 0, i64 1
  %v0 = load i8, ptr %p0, align 1
  %v1 = load i8, ptr %p1, align 1
  %z0 = zext i8 %v0 to i32
  %z1 = zext i8 %v1 to i32
  %sum = add i32 %z0, %z1
  ret i32 %sum
}