Commit 2caa8716 authored by Reid Kleckner's avatar Reid Kleckner
Browse files

Merging r325049:

------------------------------------------------------------------------
r325049 | rnk | 2018-02-13 12:47:49 -0800 (Tue, 13 Feb 2018) | 17 lines

[X86] Use EDI for retpoline when no scratch regs are left

Summary:
Instead of solving the hard problem of how to pass the callee to the indirect
jump thunk without a register, just use a CSR. At a call boundary, there's
nothing stopping us from using a CSR to hold the callee as long as we save and
restore it in the prologue.

Also, add tests for this mregparm=3 case. I wrote execution tests for
__llvm_retpoline_push, but they never got committed as lit tests, either
because I never rewrote them or because they got lost in merge conflicts.

Reviewers: chandlerc, dwmw2

Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D43214
------------------------------------------------------------------------

llvm-svn: 325090
parent 940d56c1
Loading
Loading
Loading
Loading
+18 −32
Original line number Diff line number Diff line
@@ -26265,9 +26265,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
    // attempt to help out kernels and other systems where duplicating the
    // thunks is costly.
    switch (Reg) {
    case 0:
      assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
      return "__x86_indirect_thunk";
    case X86::EAX:
      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
      return "__x86_indirect_thunk_eax";
@@ -26277,6 +26274,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
    case X86::EDX:
      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
      return "__x86_indirect_thunk_edx";
    case X86::EDI:
      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
      return "__x86_indirect_thunk_edi";
    case X86::R11:
      assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
      return "__x86_indirect_thunk_r11";
@@ -26286,9 +26286,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
  // When targeting an internal COMDAT thunk use an LLVM-specific name.
  switch (Reg) {
  case 0:
    assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
    return "__llvm_retpoline_push";
  case X86::EAX:
    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
    return "__llvm_retpoline_eax";
@@ -26298,6 +26295,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
  case X86::EDX:
    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
    return "__llvm_retpoline_edx";
  case X86::EDI:
    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
    return "__llvm_retpoline_edi";
  case X86::R11:
    assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
    return "__llvm_retpoline_r11";
@@ -26319,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
  // just use R11, but we scan for uses anyway to ensure we don't generate
  // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
  // already a register use operand to the call to hold the callee. If none
  // are available, push the callee instead. This is less efficient, but is
  // necessary for functions using 3 regparms. Such function calls are
  // (currently) not eligible for tail call optimization, because there is no
  // scratch register available to hold the address of the callee.
  // are available, use EDI instead. EDI is chosen because EBX is the PIC base
  // register and ESI is the base pointer to realigned stack frames with VLAs.
  SmallVector<unsigned, 3> AvailableRegs;
  if (Subtarget.is64Bit())
    AvailableRegs.push_back(X86::R11);
  else
    AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
    AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
  // Zero out any registers that are already used.
  for (const auto &MO : MI.operands()) {
@@ -26345,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
      break;
    }
  }
  if (!AvailableReg)
    report_fatal_error("calling convention incompatible with retpoline, no "
                       "available registers");
  const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
  if (AvailableReg == 0) {
    // No register available. Use PUSH. This must not be a tailcall, and this
    // must not be x64.
    if (Subtarget.is64Bit())
      report_fatal_error(
          "Cannot make an indirect call on x86-64 using both retpoline and a "
          "calling convention that preservers r11");
    if (Opc != X86::CALLpcrel32)
      report_fatal_error("Cannot make an indirect tail call on x86 using "
                         "retpoline without a preserved register");
    BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
    MI.getOperand(0).ChangeToES(Symbol);
    MI.setDesc(TII->get(Opc));
  } else {
  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
      .addReg(CalleeVReg);
  MI.getOperand(0).ChangeToES(Symbol);
  MI.setDesc(TII->get(Opc));
  MachineInstrBuilder(*BB->getParent(), &MI)
      .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
  }
  return BB;
}
+11 −31
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
static const char EAXThunkName[]    = "__llvm_retpoline_eax";
static const char ECXThunkName[]    = "__llvm_retpoline_ecx";
static const char EDXThunkName[]    = "__llvm_retpoline_edx";
static const char PushThunkName[]   = "__llvm_retpoline_push";
static const char EDIThunkName[]    = "__llvm_retpoline_edi";

namespace {
class X86RetpolineThunks : public MachineFunctionPass {
@@ -127,7 +127,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
      createThunkFunction(M, R11ThunkName);
    else
      for (StringRef Name :
           {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
           {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
        createThunkFunction(M, Name);
    InsertedThunks = true;
    return true;
@@ -151,9 +151,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
    populateThunk(MF, X86::R11);
  } else {
    // For 32-bit targets we need to emit a collection of thunks for various
    // possible scratch registers as well as a fallback that is used when
    // there are no scratch registers and assumes the retpoline target has
    // been pushed.
    // possible scratch registers as well as a fallback that uses EDI, which is
    // normally callee saved.
    //   __llvm_retpoline_eax:
    //         calll .Leax_call_target
    //   .Leax_capture_spec:
@@ -174,32 +173,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
    //         movl %edx, (%esp)
    //         retl
    //
    // This last one is a bit more special and so needs a little extra
    // handling.
    // __llvm_retpoline_push:
    //         calll .Lpush_call_target
    // .Lpush_capture_spec:
    //         pause
    //         lfence
    //         jmp .Lpush_capture_spec
    // .align 16
    // .Lpush_call_target:
    //         # Clear pause_loop return address.
    //         addl $4, %esp
    //         # Top of stack words are: Callee, RA. Exchange Callee and RA.
    //         pushl 4(%esp)  # Push callee
    //         pushl 4(%esp)  # Push RA
    //         popl 8(%esp)   # Pop RA to final RA
    //         popl (%esp)    # Pop callee to next top of stack
    //         retl           # Ret to callee
    //   __llvm_retpoline_edi:
    //   ... # Same setup
    //         movl %edi, (%esp)
    //         retl
    if (MF.getName() == EAXThunkName)
      populateThunk(MF, X86::EAX);
    else if (MF.getName() == ECXThunkName)
      populateThunk(MF, X86::ECX);
    else if (MF.getName() == EDXThunkName)
      populateThunk(MF, X86::EDX);
    else if (MF.getName() == PushThunkName)
      populateThunk(MF);
    else if (MF.getName() == EDIThunkName)
      populateThunk(MF, X86::EDI);
    else
      llvm_unreachable("Invalid thunk name on x86-32!");
  }
@@ -301,11 +286,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
  CaptureSpec->addSuccessor(CaptureSpec);

  CallTarget->setAlignment(4);
  if (Reg) {
  insertRegReturnAddrClobber(*CallTarget, *Reg);
  } else {
    assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
    insert32BitPushReturnAddrClobber(*CallTarget);
  }
  BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}
+42 −0
Original line number Diff line number Diff line
; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s

; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
; because there are no available scratch registers.  The Linux kernel builds
; with -mregparm=3, so we need to support it.  TCO should fail because we need
; to restore EDI.

define void @call_edi(void (i32, i32, i32)* %fp) #0 {
entry:
  tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
  ret void
}

; CHECK-LABEL: call_edi:
;     EDI is used, so it must be saved.
; CHECK: pushl %edi
; CHECK-DAG: xorl %eax, %eax
; CHECK-DAG: xorl %edx, %edx
; CHECK-DAG: xorl %ecx, %ecx
; CHECK-DAG: movl {{.*}}, %edi
; CHECK: calll __llvm_retpoline_edi
; CHECK: popl %edi
; CHECK: retl

define void @edi_external(void (i32, i32, i32)* %fp) #1 {
entry:
  tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
  ret void
}

; CHECK-LABEL: edi_external:
; CHECK: pushl %edi
; CHECK-DAG: xorl %eax, %eax
; CHECK-DAG: xorl %edx, %edx
; CHECK-DAG: xorl %ecx, %ecx
; CHECK-DAG: movl {{.*}}, %edi
; CHECK: calll __x86_indirect_thunk_edi
; CHECK: popl %edi
; CHECK: retl

attributes #0 = { "target-features"="+retpoline" }
attributes #1 = { "target-features"="+retpoline-external-thunk" }
+5 −9
Original line number Diff line number Diff line
@@ -336,10 +336,10 @@ latch:
; X86-NEXT:          movl    %edx, (%esp)
; X86-NEXT:          retl
;
; X86-LABEL:         .section        .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
; X86-NEXT:          .hidden __llvm_retpoline_push
; X86-NEXT:          .weak   __llvm_retpoline_push
; X86:       __llvm_retpoline_push:
; X86-LABEL:         .section        .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
; X86-NEXT:          .hidden __llvm_retpoline_edi
; X86-NEXT:          .weak   __llvm_retpoline_edi
; X86:       __llvm_retpoline_edi:
; X86-NEXT:  # {{.*}}                                # %entry
; X86-NEXT:          calll   [[CALL_TARGET:.*]]
; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
@@ -351,11 +351,7 @@ latch:
; X86-NEXT:          .p2align        4, 0x90
; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
; X86-NEXT:                                          # %entry
; X86-NEXT:          addl    $4, %esp
; X86-NEXT:          pushl   4(%esp)
; X86-NEXT:          pushl   4(%esp)
; X86-NEXT:          popl    8(%esp)
; X86-NEXT:          popl    (%esp)
; X86-NEXT:          movl    %edi, (%esp)
; X86-NEXT:          retl