Commit 15313f64 authored by Haowei Wu's avatar Haowei Wu
Browse files

[llvm-elfabi] Support ELF file that lacks .gnu.hash section

Before this change, when reading ELF file, elfabi determines number of
entries in .dynsym by reading the .gnu.hash section. This change makes
elfabi read section headers directly first. This change allows elfabi
works on ELF files which do not have .gnu.hash sections.

Differential Revision: https://reviews.llvm.org/D93362
parent 4210b870
Loading
Loading
Loading
Loading
+95 −0
Original line number Diff line number Diff line
@@ -217,6 +217,8 @@ public:
  Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section,
                                             Elf_Shdr_Range Sections) const;

  Expected<uint64_t> getDynSymtabSize() const;

  StringRef getRelocationTypeName(uint32_t Type) const;
  void getRelocationTypeName(uint32_t Type,
                             SmallVectorImpl<char> &Result) const;
@@ -651,6 +653,99 @@ ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
  return getStringTable(Sections[Index], WarnHandler);
}

/// This function finds the number of dynamic symbols using a GNU hash table.
///
/// @param Table The GNU hash table for .dynsym.
template <class ELFT>
static Expected<uint64_t>
getDynSymtabSizeFromGnuHash(const typename ELFT::GnuHash &Table,
                            const void *BufEnd) {
  using Elf_Word = typename ELFT::Word;
  if (Table.nbuckets == 0)
    return Table.symndx + 1;
  uint64_t LastSymIdx = 0;
  // Find the index of the first symbol in the last chain.
  for (Elf_Word Val : Table.buckets())
    LastSymIdx = std::max(LastSymIdx, (uint64_t)Val);
  const Elf_Word *It =
      reinterpret_cast<const Elf_Word *>(Table.values(LastSymIdx).end());
  // Locate the end of the chain to find the last symbol index.
  while (It < BufEnd && (*It & 1) == 0) {
    ++LastSymIdx;
    ++It;
  }
  if (It >= BufEnd) {
    return createStringError(
        object_error::parse_failed,
        "no terminator found for GNU hash section before buffer end");
  }
  return LastSymIdx + 1;
}

/// This function determines the number of dynamic symbols. It reads section
/// headers first. If section headers are not available, the number of
/// symbols will be inferred by parsing dynamic hash tables.
template <class ELFT>
Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
  // Read .dynsym section header first if available.
  Expected<Elf_Shdr_Range> SectionsOrError = sections();
  if (!SectionsOrError)
    return SectionsOrError.takeError();
  for (const Elf_Shdr &Sec : *SectionsOrError) {
    if (Sec.sh_type == ELF::SHT_DYNSYM) {
      if (Sec.sh_size % Sec.sh_entsize != 0) {
        return createStringError(object_error::parse_failed,
                                 "SHT_DYNSYM section has sh_size (" +
                                     Twine(Sec.sh_size) + ") % sh_entsize (" +
                                     Twine(Sec.sh_entsize) + ") that is not 0");
      }
      return Sec.sh_size / Sec.sh_entsize;
    }
  }

  if (!SectionsOrError->empty()) {
    // Section headers are available but .dynsym header is not found.
    // Return 0 as .dynsym does not exist.
    return 0;
  }

  // Section headers do not exist. Falling back to infer
  // upper bound of .dynsym from .gnu.hash and .hash.
  Expected<Elf_Dyn_Range> DynTable = dynamicEntries();
  if (!DynTable)
    return DynTable.takeError();
  llvm::Optional<uint64_t> ElfHash;
  llvm::Optional<uint64_t> ElfGnuHash;
  for (const Elf_Dyn &Entry : *DynTable) {
    switch (Entry.d_tag) {
    case ELF::DT_HASH:
      ElfHash = Entry.d_un.d_ptr;
      break;
    case ELF::DT_GNU_HASH:
      ElfGnuHash = Entry.d_un.d_ptr;
      break;
    }
  }
  if (ElfGnuHash) {
    Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfGnuHash);
    if (!TablePtr)
      return TablePtr.takeError();
    const Elf_GnuHash *Table =
        reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
    return getDynSymtabSizeFromGnuHash<ELFT>(*Table, this->Buf.bytes_end());
  }

  // Search SYSV hash table to try to find the upper bound of dynsym.
  if (ElfHash) {
    Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfHash);
    if (!TablePtr)
      return TablePtr.takeError();
    const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
    return Table->nchain;
  }
  return 0;
}

template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}

template <class ELFT>
+1 −57
Original line number Diff line number Diff line
@@ -440,62 +440,6 @@ static Error populateDynamic(DynamicEntries &Dyn,
  return Error::success();
}

/// This function finds the number of dynamic symbols using a GNU hash table.
///
/// @param Table The GNU hash table for .dynsym.
template <class ELFT>
static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) {
  using Elf_Word = typename ELFT::Word;
  if (Table.nbuckets == 0)
    return Table.symndx + 1;
  uint64_t LastSymIdx = 0;
  uint64_t BucketVal = 0;
  // Find the index of the first symbol in the last chain.
  for (Elf_Word Val : Table.buckets()) {
    BucketVal = std::max(BucketVal, (uint64_t)Val);
  }
  LastSymIdx += BucketVal;
  const Elf_Word *It =
      reinterpret_cast<const Elf_Word *>(Table.values(BucketVal).end());
  // Locate the end of the chain to find the last symbol index.
  while ((*It & 1) == 0) {
    LastSymIdx++;
    It++;
  }
  return LastSymIdx + 1;
}

/// This function determines the number of dynamic symbols.
/// Without access to section headers, the number of symbols must be determined
/// by parsing dynamic hash tables.
///
/// @param Dyn Entries with the locations of hash tables.
/// @param ElfFile The ElfFile that the section contents reside in.
template <class ELFT>
static Expected<uint64_t> getNumSyms(DynamicEntries &Dyn,
                                     const ELFFile<ELFT> &ElfFile) {
  using Elf_Hash = typename ELFT::Hash;
  using Elf_GnuHash = typename ELFT::GnuHash;
  // Search GNU hash table to try to find the upper bound of dynsym.
  if (Dyn.GnuHash.hasValue()) {
    Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash);
    if (!TablePtr)
      return TablePtr.takeError();
    const Elf_GnuHash *Table =
        reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
    return getDynSymtabSize<ELFT>(*Table);
  }
  // Search SYSV hash table to try to find the upper bound of dynsym.
  if (Dyn.ElfHash.hasValue()) {
    Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash);
    if (!TablePtr)
      return TablePtr.takeError();
    const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
    return Table->nchain;
  }
  return 0;
}

/// This function extracts symbol type from a symbol's st_info member and
/// maps it to an ELFSymbolType enum.
/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported.
@@ -637,7 +581,7 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) {
  }

  // Populate Symbols from .dynsym table and dynamic string table.
  Expected<uint64_t> SymCount = getNumSyms(DynEnt, ElfFile);
  Expected<uint64_t> SymCount = ElfFile.getDynSymtabSize();
  if (!SymCount)
    return SymCount.takeError();
  if (*SymCount > 0) {
+115 −0
Original line number Diff line number Diff line
## Test reading ELF with .dynsym under the following conditions:
##  * Section headers are available.
##  * Section headers are stripped but there is a DT_GNU_HASH dynamic tag.
##  * Section headers are stripped but there is a DT_HASH dynamic tag.

## Test if llvm-elfabi reads DT_SYMTAB size through section headers by puting the wrong terminator in DT_GNU_HASH.
# RUN: yaml2obj %s -o %tfull -DGNUHASHVALUE="[0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00"
# RUN: llvm-elfabi --elf %tfull --emit-tbe=- | FileCheck %s

## Test if llvm-elfabi fails to read DT_SYMTAB size through section headers when the value of sh_entsize is invalid.
# RUN: yaml2obj %s -o %tfull -DGNUHASHVALUE="[0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DENTSIZE="0x19"
# RUN: not llvm-elfabi --elf %tfull --emit-tbe=- 2>&1 | FileCheck %s --check-prefix=BADENTSIZE

## Test if llvm-elfabi reads DT_SYMTAB size through DT_GNU_HASH.
# RUN: yaml2obj %s -o %tw.gnu.hash -DGNUHASHVALUE="[0x8, 0x9]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DNOHEADER="true"
# RUN: llvm-elfabi --elf %tw.gnu.hash --emit-tbe=- | FileCheck %s

## Test if llvm-elfabi fails to read DT_SYMTAB size through DT_GNU_HASH when there is no terminator.
# RUN: yaml2obj %s -o %tw.gnu.hash -DGNUHASHVALUE="[0x8, 0xA]" -DTAG1="DT_GNU_HASH" -DVAL1="0xC00" -DNOHEADER="true"
# RUN: not llvm-elfabi --elf %tw.gnu.hash --emit-tbe=- 2>&1 | FileCheck %s --check-prefix=NOTERMINATOR

# CHECK:      --- !tapi-tbe
# CHECK-NEXT: TbeVersion:      1.0
# CHECK-NEXT: Arch:            AArch64
# CHECK-NEXT: Symbols:
# CHECK-NEXT:  bar:             { Type: Object, Size: 0, Undefined: true }
# CHECK-NEXT:  foo:             { Type: Func, Undefined: true }
# CHECK-NEXT: ...

# BADENTSIZE: SHT_DYNSYM section has sh_size (72) % sh_entsize (25) that is not 0

# NOTERMINATOR: error: no terminator found for GNU hash section before buffer end

--- !ELF
FileHeader:
  Class:      ELFCLASS64
  Data:       ELFDATA2LSB
  Type:       ET_DYN
  Machine:    EM_AARCH64
Sections:
  - Name:         .text
    Type:         SHT_PROGBITS
  - Name:         .data
    Type:         SHT_PROGBITS
  - Name:         .strtab
    Type:         SHT_STRTAB
  - Name:         .shstrtab
    Type:         SHT_STRTAB
  - Name:         .dynsym
    Type:         SHT_DYNSYM
    Flags:        [ SHF_ALLOC ]
    EntSize:      [[ENTSIZE=0x18]]
    Address:      0x400
    AddressAlign: 0x400
  - Name:         .dynstr
    Type:         SHT_STRTAB
    Flags:        [ SHF_ALLOC ]
    Address:      0x600
    AddressAlign: 0x200
  - Name:         .dynamic
    Type:         SHT_DYNAMIC
    Flags:        [ SHF_ALLOC ]
    Address:      0x800
    AddressAlign: 0x200
    Entries:
      - Tag:   DT_STRTAB
        Value: 0x600
      - Tag:   DT_STRSZ
        Value: 9
      - Tag:   DT_SYMTAB
        Value: 0x400
      - Tag:   [[TAG1]]
        Value: [[VAL1]]
      - Tag:   DT_NULL
        Value: 0
  - Name:         .hash
    Type:         SHT_HASH
    Flags:        [ SHF_ALLOC ]
    Address:      0xA00
    AddressAlign: 0x200
    Bucket:       [ 1 ]
    Chain:        [ 1, 2, 3 ]
  - Name:         .gnu.hash
    Type:         SHT_GNU_HASH
    Flags:        [ SHF_ALLOC ]
    Address:      0xC00
    AddressAlign: 0x200
    Header:
      SymNdx:    0x1
      Shift2:    0x2
      MaskWords: 2
      NBuckets:  2
    BloomFilter: [0x3, 0x4]
    HashBuckets: [0x0, 0x1]
    HashValues:  [[GNUHASHVALUE]]
DynamicSymbols:
  - Name:    foo
    Type:    STT_FUNC
    Value:   0x100
    Binding: 1
  - Name:    bar
    Type:    STT_OBJECT
    Value:   0x200
    Binding: 1
ProgramHeaders:
  - Type:     PT_LOAD
    VAddr:    0x400
    FirstSec: .dynsym
    LastSec:  .gnu.hash
  - Type:     PT_DYNAMIC
    VAddr:    0x800
    FirstSec: .dynamic
    LastSec:  .dynamic
SectionHeaderTable:
  NoHeaders: [[NOHEADER=false]]