Commit 0489682e authored by Tom Stellard's avatar Tom Stellard
Browse files

Merging r360405:

------------------------------------------------------------------------
r360405 | maskray | 2019-05-09 22:51:00 -0700 (Thu, 09 May 2019) | 25 lines

[PPC64] Define getThunkSectionSpacing() based on the range of R_PPC64_REL24

Suggested by Sean Fertile and Peter Smith.

Thunk section spacing decrease the total number of thunks. I measured a
decrease of 1% or less in some large programs, with no perceivable
slowdown in link time. Override getThunkSectionSpacing() to enable it.
0x2000000 is the farthest point R_PPC64_REL24 can reach. I tried several
numbers and found 0x2000000 works the best. Numbers near 0x2000000 work
as well but let's just use the simpler number.

As demonstrated by the updated tests, this essentially changes placement
of most thunks to the end of the output section. We leverage this
property to fix PR40740 reported by Alfredo Dal'Ava Júnior:

The output section .init consists of input sections from several object
files (crti.o crtbegin.o crtend.o crtn.o). Sections other than the last
one do not have a terminator. With this patch, we create the thunk after
the last .init input section and thus fix the issue. This is not
foolproof but works quite well for such sections (with no terminator) in
practice.

Reviewed By: ruiu, sfertile

Differential Revision: https://reviews.llvm.org/D61720
------------------------------------------------------------------------

llvm-svn: 362274
parent f1cacab4
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -113,6 +113,7 @@ public:
  void writeGotHeader(uint8_t *Buf) const override;
  bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
                  uint64_t BranchAddr, const Symbol &S) const override;
  uint32_t getThunkSectionSpacing() const override;
  bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
  RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
                          RelExpr Expr) const override;
@@ -759,6 +760,14 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
  return !inBranchRange(Type, BranchAddr, S.getVA());
}

uint32_t PPC64::getThunkSectionSpacing() const {
  // See comment in Arch/ARM.cpp for a more detailed explanation of
  // getThunkSectionSpacing(). For PPC64 we pick the constant here based on
  // R_PPC64_REL24, which is used by unconditional branch instructions.
  // 0x2000000 = (1 << 24-1) * 4
  return 0x2000000;
}

bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
  int64_t Offset = Dst - Src;
  if (Type == R_PPC64_REL14)
+2 −2
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ caller:
# CHECK-LABEL: caller
# CHECK:         bl .+44
# CHECK-NEXT:    mr 31, 3
# CHECK-NEXT:    bl .-48
# CHECK-NEXT:    bl .+44
# CHECK-NEXT:    ld 2, 24(1)
# CHECK-NEXT:    add 3, 3, 31
# CHECK-NEXT:    addi 1, 1, 32
@@ -63,6 +63,6 @@ caller:
# CHECK-EMPTY:
# CHECK-NEXT:  def:
# CHECK-NEXT:    addis 2, 12, 2
# CHECK-NEXT:    addi 2, 2, -32636
# CHECK-NEXT:    addi 2, 2, -32616
# CHECK-NEXT:    li 3, 55
# CHECK-NEXT:    blr
+6 −9
Original line number Diff line number Diff line
@@ -65,27 +65,24 @@ test:
# NEGOFFSET:  10010014:       bl .-33554432
# NEGOFFSET:  10010024:       b  .+33554432

# THUNK-LABEL: test:
# THUNK: 10010014:       bl .+20
# THUNK: 10010024:       b .+20

# .branch_lt[0]
# THUNK-LABEL: __long_branch_callee:
# THUNK-NEXT: 10010000:        addis 12, 2, -1
# THUNK-NEXT: 10010028:        addis 12, 2, -1
# THUNK-NEXT:                  ld 12, -32768(12)
# THUNK-NEXT:                  mtctr 12
# THUNK-NEXT:                  bctr

# .branch_lt[1]
# THUNK-LABEL: __long_branch_tail_callee:
# THUNK-NEXT: 10010010:        addis 12, 2, -1
# THUNK-NEXT: 10010038:        addis 12, 2, -1
# THUNK-NEXT:                  ld 12, -32760(12)
# THUNK-NEXT:                  mtctr 12
# THUNK-NEXT:                  bctr

# Each call now branches to a thunk, and although it is printed as positive
# the offset is interpreted as a signed 26 bit value so 67108812 is actually
# -52.
# THUNK-LABEL: test:
# THUNK: 10010034:       bl .-52
# THUNK: 10010044:       b .+67108812

# The offset from the TOC to the .branch_lt section  is (-1 << 16) - 32768.
#                Name             Type            Address          Off    Size
# BRANCHLT:     .branch_lt        PROGBITS        0000000010020000 020000 000010
+14 −14
Original line number Diff line number Diff line
@@ -15,11 +15,21 @@
# RUN: llvm-readelf -r %t | FileCheck --check-prefix=DYNREL %s

# NM-DAG: 0000000010028000 d .TOC.
# NM-DAG: 0000000010010028 T ifunc
# NM-DAG: 000000001001002c T ifunc2
# NM-DAG: 0000000010010000 T ifunc
# NM-DAG: 0000000010010004 T ifunc2

# SECTIONS: .plt NOBITS 0000000010030000

# __plt_ifunc - . = 0x10010020 - 0x10010010 = 16
# __plt_ifunc2 - . = 0x10010044 - 0x10010018 = 28
# CHECK: _start:
# CHECK-NEXT:                 addis 2, 12, 1
# CHECK-NEXT:                 addi 2, 2, 32760
# CHECK-NEXT: 10010010:       bl .+16
# CHECK-NEXT:                 ld 2, 24(1)
# CHECK-NEXT: 10010018:       bl .+28
# CHECK-NEXT:                 ld 2, 24(1)

# .plt[0] - .TOC. = 0x10030000 - 0x10028000 = (1<<16) - 32768
# CHECK: __plt_ifunc:
# CHECK-NEXT:     std 2, 24(1)
@@ -36,19 +46,9 @@
# CHECK-NEXT:     mtctr 12
# CHECK-NEXT:     bctr

# __plt_ifunc - . = 0x10010000 - 0x10010038 = -56
# __plt_ifunc2 - . = 0x10010014 - 0x10010040 = -44
# CHECK: _start:
# CHECK-NEXT:                 addis 2, 12, 1
# CHECK-NEXT:                 addi 2, 2, 32720
# CHECK-NEXT: 10010038:       bl .-56
# CHECK-NEXT:                 ld 2, 24(1)
# CHECK-NEXT: 10010040:       bl .-44
# CHECK-NEXT:                 ld 2, 24(1)

# Check that we emit 2 R_PPC64_IRELATIVE.
# DYNREL: R_PPC64_IRELATIVE       10010028
# DYNREL: R_PPC64_IRELATIVE       1001002c
# DYNREL: R_PPC64_IRELATIVE       10010000
# DYNREL: R_PPC64_IRELATIVE       10010004

.type ifunc STT_GNU_IFUNC
.globl ifunc
+1 −1
Original line number Diff line number Diff line
@@ -113,7 +113,7 @@ k:
// Dis:     test:
// Dis:        addis 3, 2, 0
// Dis-NEXT:   addi 3, 3, -32760
// Dis-NEXT:   bl .-60
// Dis-NEXT:   bl .+60
// Dis-NEXT:   ld 2, 24(1)
// Dis-NEXT:   addis 3, 3, 0
// Dis-NEXT:   lwa 3, -32768(3)
Loading