Commit 89010f1c authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r257925, r257929, r257930, and r257997:

------------------------------------------------------------------------
r257925 | mren | 2016-01-15 11:35:42 -0800 (Fri, 15 Jan 2016) | 10 lines

CXX_FAST_TLS calling convention: fix issue on X86-64.

When we have a single basic block, the explicit copy-back instructions should
be inserted right before the terminator. Before this fix, they were wrongly
placed at the beginning of the basic block.

I will commit fixes to other platforms as well.

PR26136
------------------------------------------------------------------------

------------------------------------------------------------------------
r257929 | mren | 2016-01-15 12:13:28 -0800 (Fri, 15 Jan 2016) | 10 lines

CXX_FAST_TLS calling convention: fix issue on AArch64.

When we have a single basic block, the explicit copy-back instructions should
be inserted right before the terminator. Before this fix, they were wrongly
placed at the beginning of the basic block.

I will commit fixes to other platforms as well.

PR26136
------------------------------------------------------------------------

------------------------------------------------------------------------
r257930 | mren | 2016-01-15 12:24:11 -0800 (Fri, 15 Jan 2016) | 8 lines

CXX_FAST_TLS calling convention: fix issue on ARM.

When we have a single basic block, the explicit copy-back instructions should
be inserted right before the terminator. Before this fix, they were wrongly
placed at the beginning of the basic block.

PR26136
------------------------------------------------------------------------

------------------------------------------------------------------------
r257997 | mren | 2016-01-16 08:39:46 -0800 (Sat, 16 Jan 2016) | 12 lines

CXX_FAST_TLS calling convention: fix issue on x86-64.

%RBP can't be handled explicitly. We generate the following code:
    pushq %rbp
    movq  %rsp, %rbp
    ...
    movq  %rbx, (%rbp)  ## 8-byte Spill
where %rbp will be overwritten by the spilled value.

The fix is to let PEI handle %RBP.
PR26136
------------------------------------------------------------------------

llvm-svn: 258162
parent bf4a0e1f
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -10133,6 +10133,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(

  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
  MachineBasicBlock::iterator MBBI = Entry->begin();
  for (const MCPhysReg *I = IStart; *I; ++I) {
    const TargetRegisterClass *RC = nullptr;
    if (AArch64::GPR64RegClass.contains(*I))
@@ -10152,13 +10153,13 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
               Attribute::NoUnwind) &&
           "Function should be nounwind in insertCopiesSplitCSR!");
    Entry->addLiveIn(*I);
    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
            NewVR)
    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
        .addReg(*I);

    // Insert the copy-back instructions right before the terminator.
    for (auto *Exit : Exits)
      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
              *I)
      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
              TII->get(TargetOpcode::COPY), *I)
          .addReg(NewVR);
  }
}
+5 −4
Original line number Diff line number Diff line
@@ -12423,6 +12423,7 @@ void ARMTargetLowering::insertCopiesSplitCSR(

  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
  MachineBasicBlock::iterator MBBI = Entry->begin();
  for (const MCPhysReg *I = IStart; *I; ++I) {
    const TargetRegisterClass *RC = nullptr;
    if (ARM::GPRRegClass.contains(*I))
@@ -12442,13 +12443,13 @@ void ARMTargetLowering::insertCopiesSplitCSR(
               Attribute::NoUnwind) &&
           "Function should be nounwind in insertCopiesSplitCSR!");
    Entry->addLiveIn(*I);
    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
            NewVR)
    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
        .addReg(*I);

    // Insert the copy-back instructions right before the terminator.
    for (auto *Exit : Exits)
      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
              *I)
      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
              TII->get(TargetOpcode::COPY), *I)
          .addReg(NewVR);
  }
}
+2 −2
Original line number Diff line number Diff line
@@ -832,10 +832,10 @@ def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
                                             R8, R9, R10, R11)>;

// CSRs that are handled by prologue, epilogue.
def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add RBP)>;

// CSRs that are handled explicitly via copies.
def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>;
def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>;

// All GPRs - except r11
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
+5 −4
Original line number Diff line number Diff line
@@ -28908,6 +28908,7 @@ void X86TargetLowering::insertCopiesSplitCSR(
  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
  MachineBasicBlock::iterator MBBI = Entry->begin();
  for (const MCPhysReg *I = IStart; *I; ++I) {
    const TargetRegisterClass *RC = nullptr;
    if (X86::GR64RegClass.contains(*I))
@@ -28925,13 +28926,13 @@ void X86TargetLowering::insertCopiesSplitCSR(
               Attribute::NoUnwind) &&
           "Function should be nounwind in insertCopiesSplitCSR!");
    Entry->addLiveIn(*I);
    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
            NewVR)
    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
        .addReg(*I);
    // Insert the copy-back instructions right before the terminator.
    for (auto *Exit : Exits)
      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
              *I)
      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
              TII->get(TargetOpcode::COPY), *I)
          .addReg(NewVR);
  }
}
+27 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
@sg = internal thread_local global %struct.S zeroinitializer, align 1
@__dso_handle = external global i8
@__tls_guard = internal thread_local unnamed_addr global i1 false
@sum1 = internal thread_local global i32 0, align 4

declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
@@ -74,3 +75,29 @@ __tls_init.exit:
; CHECK-NOT: ldp d27, d26
; CHECK-NOT: ldp d29, d28
; CHECK-NOT: ldp d31, d30

; CHECK-LABEL: _ZTW4sum1
; CHECK-NOT: stp d31, d30
; CHECK-NOT: stp d29, d28
; CHECK-NOT: stp d27, d26
; CHECK-NOT: stp d25, d24
; CHECK-NOT: stp d23, d22
; CHECK-NOT: stp d21, d20
; CHECK-NOT: stp d19, d18
; CHECK-NOT: stp d17, d16
; CHECK-NOT: stp d7, d6
; CHECK-NOT: stp d5, d4
; CHECK-NOT: stp d3, d2
; CHECK-NOT: stp d1, d0
; CHECK-NOT: stp x20, x19
; CHECK-NOT: stp x14, x13
; CHECK-NOT: stp x12, x11
; CHECK-NOT: stp x10, x9
; CHECK-NOT: stp x8, x7
; CHECK-NOT: stp x6, x5
; CHECK-NOT: stp x4, x3
; CHECK-NOT: stp x2, x1
; CHECK: blr
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
  ret i32* @sum1
}
Loading