Commit 6b0eb5a6 authored by Fangrui Song's avatar Fangrui Song
Browse files

[ELF] Improve --gc-sections compatibility with GNU ld regarding section groups

Based on D70020 by serge-sans-paille.

The ELF spec says:

> Furthermore, there may be internal references among these sections that would not make sense if one of the sections were removed or replaced by a duplicate from another object. Therefore, such groups must be included or omitted from the linked object as a unit. A section cannot be a member of more than one group.

GNU ld has 2 behaviors that we don't have:

- Group members (nextInSectionGroup != nullptr) are subject to garbage collection.
  This includes non-SHF_ALLOC SHT_NOTE sections.
  In particular, discarding non-SHF_ALLOC SHT_NOTE sections is an expected behavior by the Annobin
  project. See
  https://developers.redhat.com/blog/2018/02/20/annobin-storing-information-binaries/
  for more information.
- Groups members are retained or discarded as a unit.
  Members may have internal references that are not expressed as
  SHF_LINK_ORDER, relocations, etc. It seems that we should be more conservative here:
  if a section is marked live, mark all the other member within the
  group.

Both behaviors are reasonable. This patch implements them.

A new field InputSectionBase::nextInSectionGroup tracks the next member
within a group. on ELF64, this increases sizeof(InputSectionBase) froms
144 to 152.

InputSectionBase::dependentSections tracks section dependencies, which
is used by both --gc-sections and /DISCARD/. We can't overload it for
the "next member" semantic, because we should allow /DISCARD/ to discard
sections independent of --gc-sections (GNU ld behavior). This behavior
may be reasonably used by `/DISCARD/ : { *(.ARM.exidx*) }` or `/DISCARD/
: { *(.note*) }` (new test `linkerscript/discard-group.s`).

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D70146
parent 3a76b8a5
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -507,6 +507,8 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
  this->sectionStringTable =
      CHECK(obj.getSectionStringTable(objSections), this);

  std::vector<ArrayRef<Elf_Word>> selectedGroups;

  for (size_t i = 0, e = objSections.size(); i < e; ++i) {
    if (this->sections[i] == &InputSection::discarded)
      continue;
@@ -564,6 +566,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
      if (isNew) {
        if (config->relocatable)
          this->sections[i] = createInputSection(sec);
        selectedGroups.push_back(entries);
        continue;
      }

@@ -588,6 +591,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
    }
  }

  // This block handles SHF_LINK_ORDER.
  for (size_t i = 0, e = objSections.size(); i < e; ++i) {
    if (this->sections[i] == &InputSection::discarded)
      continue;
@@ -610,6 +614,25 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
            " with SHF_LINK_ORDER should not refer a non-regular section: " +
            toString(linkSec));
  }

  // For each secion group, connect its members in a circular doubly-linked list
  // via nextInSectionGroup. See the comment in markLive().
  for (ArrayRef<Elf_Word> entries : selectedGroups) {
    InputSectionBase *head;
    InputSectionBase *prev = nullptr;
    for (uint32_t secIndex : entries.slice(1)) {
      InputSectionBase *s = this->sections[secIndex];
      if (!s || s == &InputSection::discarded)
        continue;
      if (prev)
        prev->nextInSectionGroup = s;
      else
        head = s;
      prev = s;
    }
    if (prev)
      prev->nextInSectionGroup = head;
  }
}

// For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
+4 −0
Original line number Diff line number Diff line
@@ -142,6 +142,10 @@ public:
  // cases this points one level up.
  SectionBase *parent = nullptr;

  // The next member in the section group if this section is in a group. This is
  // used by --gc-sections.
  InputSectionBase *nextInSectionGroup = nullptr;

  template <class ELFT> ArrayRef<typename ELFT::Rel> rels() const {
    assert(!areRelocsRela);
    return llvm::makeArrayRef(
+15 −2
Original line number Diff line number Diff line
@@ -165,9 +165,11 @@ static bool isReserved(InputSectionBase *sec) {
  switch (sec->type) {
  case SHT_FINI_ARRAY:
  case SHT_INIT_ARRAY:
  case SHT_NOTE:
  case SHT_PREINIT_ARRAY:
    return true;
  case SHT_NOTE:
    // SHT_NOTE sections in a group are subject to garbage collection.
    return !sec->nextInSectionGroup;
  default:
    StringRef s = sec->name;
    return s.startswith(".ctors") || s.startswith(".dtors") ||
@@ -283,6 +285,10 @@ template <class ELFT> void MarkLive<ELFT>::mark() {

    for (InputSectionBase *isec : sec.dependentSections)
      enqueue(isec, 0);

    // Mark the next group member.
    if (sec.nextInSectionGroup)
      enqueue(sec.nextInSectionGroup, 0);
  }
}

@@ -353,12 +359,19 @@ template <class ELFT> void markLive() {
  // or -emit-reloc were given. And they are subject of garbage
  // collection because, if we remove a text section, we also
  // remove its relocation section.
  //
  // Note on nextInSectionGroup: The ELF spec says that group sections are
  // included or omitted as a unit. We take the interpretation that:
  //
  // - Group members (nextInSectionGroup != nullptr) are subject to garbage
  //   collection.
  // - Groups members are retained or discarded as a unit.
  for (InputSectionBase *sec : inputSections) {
    bool isAlloc = (sec->flags & SHF_ALLOC);
    bool isLinkOrder = (sec->flags & SHF_LINK_ORDER);
    bool isRel = (sec->type == SHT_REL || sec->type == SHT_RELA);

    if (!isAlloc && !isLinkOrder && !isRel)
    if (!isAlloc && !isLinkOrder && !isRel && !sec->nextInSectionGroup)
      sec->markLive();
  }

+54 −0
Original line number Diff line number Diff line
# REQUIRES: x86
## Check that group members are retained or discarded as a unit, and
## non-SHF_ALLOC sections in a group are subject to garbage collection.
## This is compatible with GNU ld.

# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# RUN: ld.lld --gc-sections %t.o -o %t.dead
# RUN: llvm-readobj -S %t.dead | FileCheck %s --check-prefix=CHECK-DEAD

## .mynote.bar is retained because it is not in a group.
# CHECK-DEAD-NOT: Name: .myanote.foo
# CHECK-DEAD-NOT: Name: .mytext.foo
# CHECK-DEAD-NOT: Name: .mybss.foo
# CHECK-DEAD-NOT: Name: .mynote.foo
# CHECK-DEAD:     Name: .mynote.bar

# RUN: ld.lld --gc-sections %t.o -o %t -e anote_foo
# RUN: llvm-readobj -S %t | FileCheck %s --check-prefix=CHECK-LIVE
# RUN: ld.lld --gc-sections %t.o -o %t -e foo
# RUN: llvm-readobj -S %t | FileCheck %s --check-prefix=CHECK-LIVE
# RUN: ld.lld --gc-sections %t.o -o %t -e bss_foo
# RUN: llvm-readobj -S %t | FileCheck %s --check-prefix=CHECK-LIVE

## note_foo as the entry point does not make much sense because it is defined
## in a non-SHF_ALLOC section. This is just to demonstrate the behavior.
# RUN: ld.lld --gc-sections %t.o -o %t -e note_foo
# RUN: llvm-readobj -S %t | FileCheck %s --check-prefix=CHECK-LIVE

# CHECK-LIVE: Name: .myanote.foo
# CHECK-LIVE: Name: .mytext.foo
# CHECK-LIVE: Name: .mybss.foo
# CHECK-LIVE: Name: .mynote.foo
# CHECK-LIVE: Name: .mynote.bar

.globl anote_foo, foo, bss_foo, note_foo

.section .myanote.foo,"aG",@note,foo,comdat
anote_foo:
.byte 0

.section .mytext.foo,"axG",@progbits,foo,comdat
foo:
.byte 0

.section .mybss.foo,"awG",@nobits,foo,comdat
bss_foo:
.byte 0

.section .mynote.foo,"G",@note,foo,comdat
note_foo:
.byte 0

.section .mynote.bar,"",@note
.byte 0
+33 −0
Original line number Diff line number Diff line
# REQUIRES: arm
## For --gc-sections, group members are retained or discarded as a unit.
## However, discarding a section via /DISCARD/ should not discard other members
## within the group. This is compatible with GNU ld.

# RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t.o

## We can discard .ARM.exidx* in a group.
# RUN: echo 'SECTIONS { /DISCARD/ : { *(.ARM.exidx*) }}' > %t.noarm.script
# RUN: ld.lld %t.o --gc-sections -T %t.noarm.script -o %t.noarm
# RUN: llvm-readobj -S %t.noarm | FileCheck %s --check-prefix=NOARM --implicit-check-not='Name: .ARM.exidx'

# NOARM: Name: .text
# NOARM: Name: .note._start

## Another example, we can discard SHT_NOTE in a group.
# RUN: echo 'SECTIONS { /DISCARD/ : { *(.note*) }}' > %t.nonote.script
# RUN: ld.lld %t.o --gc-sections -T %t.nonote.script -o %t.nonote
# RUN: llvm-readobj -S %t.nonote | FileCheck %s --check-prefix=NONOTE --implicit-check-not='Name: .note'

# NONOTE: Name: .ARM.exidx
# NONOTE: Name: .text

.section .text._start,"axG",%progbits,_start,comdat
.globl _start
_start:
.fnstart
.cantunwind
bx lr
.fnend

.section .note._start,"G",%note,_start,comdat
.byte 0