Commit c53c2058 authored by Sterling Augustine's avatar Sterling Augustine
Browse files

Cache uwnind frame headers as they are found.

Summary:
This improves unwind performance quite substantially, and follows
a somewhat similar approach used in libgcc_s as described in the
thread here:

https://gcc.gnu.org/ml/gcc/2005-02/msg00625.html

On certain extremely exception heavy internal tests, the time
drops from about 80 minutes to about five minutes.

Subscribers: libcxx-commits

Tags: #libc

Differential Revision: https://reviews.llvm.org/D75954
parent 30804d0a
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -452,6 +452,11 @@ struct _LIBUNWIND_HIDDEN dl_iterate_cb_data {
    #error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform."
  #endif

#include "FrameHeaderCache.hpp"

// There should be just one of these per process.
static FrameHeaderCache ProcessFrameHeaderCache;

static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
                               dl_iterate_cb_data *cbdata) {
  if (phdr->p_type == PT_LOAD) {
@@ -466,10 +471,13 @@ static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
  return false;
}

int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
                             void *data) {
  auto cbdata = static_cast<dl_iterate_cb_data *>(data);
  if (pinfo->dlpi_phnum == 0 || cbdata->targetAddr < pinfo->dlpi_addr)
    return 0;
  if (ProcessFrameHeaderCache.find(pinfo, pinfo_size, data))
    return 1;

  Elf_Addr image_base = calculateImageBase(pinfo);
  bool found_obj = false;
@@ -496,9 +504,11 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
    } else if (!found_obj) {
      found_obj = checkAddrInSegment(phdr, image_base, cbdata);
    }
    if (found_obj && found_hdr)
    if (found_obj && found_hdr) {
      ProcessFrameHeaderCache.add(cbdata->sects);
      return 1;
    }
  }
  cbdata->sects->dwarf_section_length = 0;
  return 0;
}
+149 −0
Original line number Diff line number Diff line
//===-FrameHeaderCache.hpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Cache the elf program headers necessary to unwind the stack more efficiently
// in the presence of many dsos.
//
//===----------------------------------------------------------------------===//

#ifndef __FRAMEHEADER_CACHE_HPP__
#define __FRAMEHEADER_CACHE_HPP__

#include "config.h"
#include <limits.h>

#ifdef _LIBUNWIND_DEBUG_FRAMEHEADER_CACHE
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x) _LIBUNWIND_LOG0(x)
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...)                            \
  _LIBUNWIND_LOG(msg, __VA_ARGS__)
#else
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x)
#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...)
#endif

// This cache should only be be used from within a dl_iterate_phdr callback.
// dl_iterate_phdr does the necessary synchronization to prevent problems
// with concurrent access via the libc load lock. Adding synchronization
// for other uses is possible, but not currently done.

class _LIBUNWIND_HIDDEN FrameHeaderCache {
  struct CacheEntry {
    uintptr_t LowPC() { return Info.dso_base; };
    uintptr_t HighPC() { return Info.dso_base + Info.dwarf_section_length; };
    UnwindInfoSections Info;
    CacheEntry *Next;
  };

  static const size_t kCacheEntryCount = 8;

  // Can't depend on the C++ standard library in libunwind, so use an array to
  // allocate the entries, and two linked lists for ordering unused and recently
  // used entries.  FIXME: Would the the extra memory for a doubly-linked list
  // be better than the runtime cost of traversing a very short singly-linked
  // list on a cache miss? The entries themselves are all small and consecutive,
  // so unlikely to cause page faults when following the pointers. The memory
  // spent on additional pointers could also be spent on more entries.

  CacheEntry Entries[kCacheEntryCount];
  CacheEntry *MostRecentlyUsed;
  CacheEntry *Unused;

  void resetCache() {
    _LIBUNWIND_FRAMEHEADERCACHE_TRACE0("FrameHeaderCache reset");
    MostRecentlyUsed = nullptr;
    Unused = &Entries[0];
    for (size_t i = 0; i < kCacheEntryCount - 1; i++) {
      Entries[i].Next = &Entries[i + 1];
    }
    Entries[kCacheEntryCount - 1].Next = nullptr;
  }

  bool cacheNeedsReset(dl_phdr_info *PInfo) {
    // C libraries increment dl_phdr_info.adds and dl_phdr_info.subs when
    // loading and unloading shared libraries. If these values change between
    // iterations of dl_iterate_phdr, then invalidate the cache.

    // These are static to avoid needing an initializer, and unsigned long long
    // because that is their type within the extended dl_phdr_info.  Initialize
    // these to something extremely unlikely to be found upon the first call to
    // dl_iterate_phdr.
    static unsigned long long LastAdds = ULLONG_MAX;
    static unsigned long long LastSubs = ULLONG_MAX;
    if (PInfo->dlpi_adds != LastAdds || PInfo->dlpi_subs != LastSubs) {
      // Resetting the entire cache is a big hammer, but this path is rare--
      // usually just on the very first call, when the cache is empty anyway--so
      // added complexity doesn't buy much.
      LastAdds = PInfo->dlpi_adds;
      LastSubs = PInfo->dlpi_subs;
      resetCache();
      return true;
    }
    return false;
  }

public:
  bool find(dl_phdr_info *PInfo, size_t, void *data) {
    if (cacheNeedsReset(PInfo) || MostRecentlyUsed == nullptr)
      return false;

    auto *CBData = static_cast<dl_iterate_cb_data *>(data);
    CacheEntry *Current = MostRecentlyUsed;
    CacheEntry *Previous = nullptr;
    while (Current != nullptr) {
      _LIBUNWIND_FRAMEHEADERCACHE_TRACE(
          "FrameHeaderCache check %lx in [%lx - %lx)", CBData->targetAddr,
          Current->LowPC(), Current->HighPC());
      if (Current->LowPC() <= CBData->targetAddr &&
          CBData->targetAddr < Current->HighPC()) {
        _LIBUNWIND_FRAMEHEADERCACHE_TRACE(
            "FrameHeaderCache hit %lx in [%lx - %lx)", CBData->targetAddr,
            Current->LowPC(), Current->HighPC());
        if (Previous) {
          // If there is no Previous, then Current is already the
          // MostRecentlyUsed, and no need to move it up.
          Previous->Next = Current->Next;
          Current->Next = MostRecentlyUsed;
          MostRecentlyUsed = Current;
        }
        *CBData->sects = Current->Info;
        return true;
      }
      Previous = Current;
      Current = Current->Next;
    }
    _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache miss for address %lx",
                                      CBData->targetAddr);
    return false;
  }

  void add(const UnwindInfoSections *UIS) {
    CacheEntry *Current = nullptr;

    if (Unused != nullptr) {
      Current = Unused;
      Unused = Unused->Next;
    } else {
      Current = MostRecentlyUsed;
      CacheEntry *Previous = nullptr;
      while (Current->Next != nullptr) {
        Previous = Current;
        Current = Current->Next;
      }
      Previous->Next = nullptr;
      _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache evict [%lx - %lx)",
                                        Current->LowPC(), Current->HighPC());
    }

    Current->Info = *UIS;
    Current->Next = MostRecentlyUsed;
    MostRecentlyUsed = Current;
    _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache add [%lx - %lx)",
                                      MostRecentlyUsed->LowPC(),
                                      MostRecentlyUsed->HighPC());
  }
};

#endif // __FRAMEHEADER_CACHE_HPP__
+82 −0
Original line number Diff line number Diff line
// The other libunwind tests don't test internal interfaces, so the include path
// is a little wonky.
#include "../src/config.h"

// Only run this test under supported configurations.
// This #if chain is ugly, but see the comments in AddressSpace.hpp for
// the reasoning.

#ifdef __APPLE__
int main() { return 0; }
#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
int main() { return 0; }
#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL)
int main() { return 0; }
#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32)
int main() { return 0; }
#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
int main() { return 0; }
#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__)
int main() { return 0; }
#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)

#include <link.h>
#include <stdio.h>

// This file defines several of the data structures needed here,
// and includes FrameHeaderCache.hpp as well.
#include "../src/AddressSpace.hpp"

#define kBaseAddr 0xFFF000
#define kDwarfSectionLength 0xFF

using namespace libunwind;

int main() {
  FrameHeaderCache FHC;
  struct dl_phdr_info PInfo;
  memset(&PInfo, 0, sizeof(PInfo));
  // The cache itself should only care about these two fields--they
  // tell the cache to invalidate or not; everything else is handled
  // by AddressSpace.hpp.
  PInfo.dlpi_adds = 6;
  PInfo.dlpi_subs = 7;

  UnwindInfoSections UIS;
  UIS.dso_base = kBaseAddr;
  UIS.dwarf_section_length = kDwarfSectionLength;
  dl_iterate_cb_data CBData;
  // Unused by the cache.
  CBData.addressSpace = nullptr;
  CBData.sects = &UIS;
  CBData.targetAddr = kBaseAddr + 1;

  // Nothing present, shouldn't find.
  if (FHC.find(&PInfo, 0, &CBData))
    abort();
  FHC.add(&UIS);
  // Just added. Should find.
  if (!FHC.find(&PInfo, 0, &CBData))
    abort();
  // Cache is invalid. Shouldn't find.
  PInfo.dlpi_adds++;
  if (FHC.find(&PInfo, 0, &CBData))
    abort();

  FHC.add(&UIS);
  CBData.targetAddr = kBaseAddr - 1;
  // Shouldn't find something outside of the addresses.
  if (FHC.find(&PInfo, 0, &CBData))
    abort();
  // Add enough things to the cache that the entry is evicted.
  for (int i = 0; i < 9; i++) {
    UIS.dso_base = kBaseAddr + (kDwarfSectionLength * i);
    FHC.add(&UIS);
  }
  CBData.targetAddr = kBaseAddr;
  // Should have been evicted.
  if (FHC.find(&PInfo, 0, &CBData))
    abort();
  return 0;
}
#endif