Commit b5585dd3 authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r261441, r261447, and r261546:

------------------------------------------------------------------------
r261441 | nemanjai | 2016-02-20 10:16:25 -0800 (Sat, 20 Feb 2016) | 12 lines

Fix for PR 26500

This patch corresponds to review:
http://reviews.llvm.org/D17294

It ensures that whatever block we are emitting the prologue/epilogue into, we
have the necessary scratch registers. It takes away the hard-coded register
numbers for use as scratch registers as registers that are guaranteed to be
available in the function prologue/epilogue are not guaranteed to be available
within the function body. Since we shrink-wrap, the prologue/epilogue may end
up in the function body.
------------------------------------------------------------------------

------------------------------------------------------------------------
r261447 | nemanjai | 2016-02-20 12:45:37 -0800 (Sat, 20 Feb 2016) | 6 lines

Fix the build bot break caused by rL261441.

The patch has a necessary call to a function inside an assert. Which is fine
when you have asserts turned on. Not so much when they're off. Sorry about
the regression.
------------------------------------------------------------------------

------------------------------------------------------------------------
r261546 | nemanjai | 2016-02-22 10:04:00 -0800 (Mon, 22 Feb 2016) | 6 lines

Fix for PR26690 take 2

This is what was meant to be in the initial commit to fix this bug. The
parens were missing. This commit also adds a test case for the bug and
has undergone full testing on PPC and X86.
------------------------------------------------------------------------

llvm-svn: 261572
parent 5bf5432b
Loading
Loading
Loading
Loading
+164 −41
Original line number Diff line number Diff line
@@ -556,16 +556,42 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
    }
}

bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
/*  This function will do the following:
    - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
      respectively (defaults recommended by the ABI) and return true
    - If MBB is not an entry block, initialize the register scavenger and look
      for available registers.
    - If the defaults (R0/R12) are available, return true
    - If TwoUniqueRegsRequired is set to true, it looks for two unique
      registers. Otherwise, look for a single available register.
      - If the required registers are found, set SR1 and SR2 and return true.
      - If the required registers are not found, set SR2 or both SR1 and SR2 to
        PPC::NoRegister and return false.

    Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
    is not set, this function will attempt to find two different registers, but
    still return true if only one register is available (and set SR1 == SR2).
*/
bool
PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
                                      bool UseAtEnd,
                                           unsigned *ScratchRegister) const {
                                      bool TwoUniqueRegsRequired,
                                      unsigned *SR1,
                                      unsigned *SR2) const {
  RegScavenger RS;
  unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
  unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;

  // Set the defaults for the two scratch registers.
  if (SR1)
    *SR1 = R0;

  if (ScratchRegister)
    *ScratchRegister = R0;
  if (SR2) {
    assert (SR1 && "Asking for the second scratch register but not the first?");
    *SR2 = R12;
  }

  // If MBB is an entry or exit block, use R0 as the scratch register
  // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
  if ((UseAtEnd && MBB->isReturnBlock()) ||
      (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
    return true;
@@ -573,8 +599,8 @@ bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
  RS.enterBasicBlock(MBB);

  if (UseAtEnd && !MBB->empty()) {
    // The scratch register will be used at the end of the block, so must consider
    // all registers used within the block
    // The scratch register will be used at the end of the block, so must
    // consider all registers used within the block

    MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
    // If no terminator, back iterator up to previous instruction.
@@ -585,34 +611,85 @@ bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
      RS.forward(MBBI);
  }

  if (!RS.isRegUsed(R0)) 
  // If the two registers are available, we're all good.
  // Note that we only return here if both R0 and R12 are available because
  // although the function may not require two unique registers, it may benefit
  // from having two so we should try to provide them.
  if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
    return true;

  unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass
                                  : &PPC::GPRCRegClass);
  // Get the list of callee-saved registers for the target.
  const PPCRegisterInfo *RegInfo =
      static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());

  // Get all the available registers in the block.
  BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
                                     &PPC::GPRCRegClass);

  // We shouldn't use callee-saved registers as scratch registers as they may be
  // available when looking for a candidate block for shrink wrapping but not
  // available when the actual prologue/epilogue is being emitted because they
  // were added as live-in to the prologue block by PrologueEpilogueInserter.
  for (int i = 0; CSRegs[i]; ++i)
    BV.reset(CSRegs[i]);

  // Set the first scratch register to the first available one.
  if (SR1) {
    int FirstScratchReg = BV.find_first();
    *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
  }

  // If there is another one available, set the second scratch register to that.
  // Otherwise, set it to either PPC::NoRegister if this function requires two
  // or to whatever SR1 is set to if this function doesn't require two.
  if (SR2) {
    int SecondScratchReg = BV.find_next(*SR1);
    if (SecondScratchReg != -1)
      *SR2 = SecondScratchReg;
    else
      *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
  }

  // Make sure the register scavenger was able to find an available register
  // If not, use R0 but return false to indicate no register was available and
  // R0 must be used (as recommended by the ABI)
  if (Reg == 0)
  // Now that we've done our best to provide both registers, double check
  // whether we were unable to provide enough.
  if (BV.count() < (TwoUniqueRegsRequired ? 2 : 1))
    return false;

  if (ScratchRegister)
    *ScratchRegister = Reg;

  return true;
}

// We need a scratch register for spilling LR and for spilling CR. By default,
// we use two scratch registers to hide latency. However, if only one scratch
// register is available, we can adjust for that by not overlapping the spill
// code. However, if we need to realign the stack (i.e. have a base pointer)
// and the stack frame is large, we need two scratch registers.
bool
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
  const PPCRegisterInfo *RegInfo =
      static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
  MachineFunction &MF = *(MBB->getParent());
  bool HasBP = RegInfo->hasBasePointer(MF);
  unsigned FrameSize = determineFrameLayout(MF, false);
  int NegFrameSize = -FrameSize;
  bool IsLargeFrame = !isInt<16>(NegFrameSize);
  MachineFrameInfo *MFI = MF.getFrameInfo();
  unsigned MaxAlign = MFI->getMaxAlignment();

  return IsLargeFrame && HasBP && MaxAlign > 1;
}

bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);

  return findScratchRegister(TmpMBB, false, nullptr);
  return findScratchRegister(TmpMBB, false,
                             twoUniqueScratchRegsRequired(TmpMBB));
}

bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);

  return findScratchRegister(TmpMBB, true, nullptr);
  return findScratchRegister(TmpMBB, true);
}

void PPCFrameLowering::emitPrologue(MachineFunction &MF,
@@ -664,6 +741,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
  bool MustSaveLR = FI->mustSaveLR();
  const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
  bool MustSaveCR = !MustSaveCRs.empty();
  // Do we have a frame pointer and/or base pointer for this function?
  bool HasFP = hasFP(MF);
  bool HasBP = RegInfo->hasBasePointer(MF);
@@ -701,8 +779,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
  assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
         "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");

  findScratchRegister(&MBB, false, &ScratchReg);
  assert(ScratchReg && "No scratch register!");
  // Using the same bool variable as below to supress compiler warnings.
  bool SingleScratchReg =
    findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
                        &ScratchReg, &TempReg);
  assert(SingleScratchReg &&
         "Required number of registers not available in this block");

  SingleScratchReg = ScratchReg == TempReg;

  int LROffset = getReturnSaveOffset();

@@ -748,13 +832,30 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
  // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
  bool isLargeFrame = !isInt<16>(NegFrameSize);

  assert((isPPC64 || !MustSaveCR) &&
         "Prologue CR saving supported only in 64-bit mode");

  // If we need to spill the CR and the LR but we don't have two separate
  // registers available, we must spill them one at a time
  if (MustSaveCR && SingleScratchReg && MustSaveLR) {
    // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
    // If only one or two CR fields are clobbered, it could be more
    // efficient to use mfocrf to selectively save just those fields.
    MachineInstrBuilder MIB =
      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
      MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
    BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
      .addReg(TempReg, getKillRegState(true))
      .addImm(8)
      .addReg(SPReg);
  }

  if (MustSaveLR)
    BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);

  assert((isPPC64 || MustSaveCRs.empty()) &&
         "Prologue CR saving supported only in 64-bit mode");

  if (!MustSaveCRs.empty()) { // will only occur for PPC64
  if (MustSaveCR &&
      !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
    // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
    // If only one or two CR fields are clobbered, it could be more
    // efficient to use mfocrf to selectively save just those fields.
@@ -792,7 +893,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
      .addImm(LROffset)
      .addReg(SPReg);

  if (!MustSaveCRs.empty()) // will only occur for PPC64
  if (MustSaveCR &&
      !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
    BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
      .addReg(TempReg, getKillRegState(true))
      .addImm(8)
@@ -811,6 +913,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
      .addReg(SPReg);
  }

  // This condition must be kept in sync with canUseAsPrologue.
  if (HasBP && MaxAlign > 1) {
    if (isPPC64)
      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
@@ -828,6 +931,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
        .addReg(ScratchReg, RegState::Kill)
        .addImm(NegFrameSize);
    } else {
      assert(!SingleScratchReg && "Only a single scratch reg available");
      BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
        .addImm(NegFrameSize >> 16);
      BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
@@ -951,7 +1055,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
      // For SVR4, don't emit a move for the CR spill slot if we haven't
      // spilled CRs.
      if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
          && MustSaveCRs.empty())
          && !MustSaveCR)
        continue;

      // For 64-bit SVR4 when we have spilled CRs, the spill location
@@ -1005,6 +1109,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
  bool MustSaveLR = FI->mustSaveLR();
  const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
  bool MustSaveCR = !MustSaveCRs.empty();
  // Do we have a frame pointer and/or base pointer for this function?
  bool HasFP = hasFP(MF);
  bool HasBP = RegInfo->hasBasePointer(MF);
@@ -1031,8 +1136,13 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,

  int FPOffset = 0;

  findScratchRegister(&MBB, true, &ScratchReg);
  assert(ScratchReg && "No scratch register!");
  // Using the same bool variable as below to supress compiler warnings.
  bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
                                              &TempReg);
  assert(SingleScratchReg &&
         "Could not find an available scratch register");

  SingleScratchReg = ScratchReg == TempReg;

  if (HasFP) {
    if (isSVR4ABI) {
@@ -1130,15 +1240,27 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
    }
  }

  assert((isPPC64 || !MustSaveCR) &&
         "Epilogue CR restoring supported only in 64-bit mode");

  // If we need to save both the LR and the CR and we only have one available
  // scratch register, we must do them one at a time.
  if (MustSaveCR && SingleScratchReg && MustSaveLR) {
    BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
      .addImm(8)
      .addReg(SPReg);
    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
        .addReg(TempReg, getKillRegState(i == e-1));
  }

  if (MustSaveLR)
    BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
      .addImm(LROffset)
      .addReg(SPReg);

  assert((isPPC64 || MustSaveCRs.empty()) &&
         "Epilogue CR restoring supported only in 64-bit mode");

  if (!MustSaveCRs.empty()) // will only occur for PPC64
  if (MustSaveCR &&
      !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
    BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
      .addImm(8)
      .addReg(SPReg);
@@ -1160,7 +1282,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
      .addImm(BPOffset)
      .addReg(SPReg);

  if (!MustSaveCRs.empty()) // will only occur for PPC64
  if (MustSaveCR &&
      !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
        .addReg(TempReg, getKillRegState(i == e-1));
+24 −11
Original line number Diff line number Diff line
@@ -30,28 +30,41 @@ class PPCFrameLowering: public TargetFrameLowering {
  const unsigned BasePointerSaveOffset;

  /**
   * \brief Find a register that can be used in function prologue and epilogue
   * \brief Find register[s] that can be used in function prologue and epilogue
   *
   * Find a register that can be use as the scratch register in function
   * Find register[s] that can be use as scratch register[s] in function
   * prologue and epilogue to save various registers (Link Register, Base
   * Pointer, etc.). Prefer R0, if it is available. If it is not available,
   * then choose a different register.
   * Pointer, etc.). Prefer R0/R12, if available. Otherwise choose whatever
   * register[s] are available.
   *
   * This method will return true if an available register was found (including
   * R0). If no available registers are found, the method returns false and sets
   * ScratchRegister to R0, as per the recommendation in the ABI.
   * This method will return true if it is able to find enough unique scratch
   * registers (1 or 2 depending on the requirement). If it is unable to find
   * enough available registers in the block, it will return false and set
   * any passed output parameter that corresponds to a required unique register
   * to PPC::NoRegister.
   *
   * \param[in] MBB The machine basic block to find an available register for
   * \param[in] UseAtEnd Specify whether the scratch register will be used at
   *                     the end of the basic block (i.e., will the scratch
   *                     register kill a register defined in the basic block)
   * \param[out] ScratchRegister The scratch register to use
   * \return true if a scratch register was found. false of a scratch register
   *         was not found and R0 is being used as the default.
   * \param[in] TwoUniqueRegsRequired Specify whether this basic block will
   *                                  require two unique scratch registers.
   * \param[out] SR1 The scratch register to use
   * \param[out] SR2 The second scratch register. If this pointer is not null
   *                 the function will attempt to set it to an available
   *                 register regardless of whether there is a hard requirement
   *                 for two unique scratch registers.
   * \return true if the required number of registers was found.
   *         false if the required number of scratch register weren't available.
   *         If either output parameter refers to a required scratch register
   *         that isn't available, it will be set to an invalid value.
   */
  bool findScratchRegister(MachineBasicBlock *MBB,
                           bool UseAtEnd,
                           unsigned *ScratchRegister) const;
                           bool TwoUniqueRegsRequired = false,
                           unsigned *SR1 = nullptr,
                           unsigned *SR2 = nullptr) const;
  bool twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const;

public:
  PPCFrameLowering(const PPCSubtarget &STI);
+118 −0
Original line number Diff line number Diff line
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s

%struct.anon = type { %struct.anon.0, %struct.anon.1 }
%struct.anon.0 = type { i32 }
%struct.anon.1 = type { i32 }

@i = common global i32 0, align 4
@b = common global i32* null, align 8
@c = common global i32 0, align 4
@a = common global i32 0, align 4
@h = common global i32 0, align 4
@g = common global i32 0, align 4
@j = common global i32 0, align 4
@f = common global %struct.anon zeroinitializer, align 4
@d = common global i32 0, align 4
@e = common global i32 0, align 4

; Function Attrs: norecurse nounwind
define signext i32 @fn1(i32* nocapture %p1, i32 signext %p2, i32* nocapture %p3) {
entry:
  %0 = load i32, i32* @i, align 4, !tbaa !1
  %cond = icmp eq i32 %0, 8
  br i1 %cond, label %if.end16, label %while.cond.preheader

while.cond.preheader:                             ; preds = %entry
  %1 = load i32*, i32** @b, align 8, !tbaa !5
  %2 = load i32, i32* %1, align 4, !tbaa !1
  %tobool18 = icmp eq i32 %2, 0
  br i1 %tobool18, label %while.end, label %while.body.lr.ph

while.body.lr.ph:                                 ; preds = %while.cond.preheader
  %.pre = load i32, i32* @c, align 4, !tbaa !1
  br label %while.body

while.body:                                       ; preds = %while.body.backedge, %while.body.lr.ph
  switch i32 %.pre, label %while.body.backedge [
    i32 0, label %sw.bb1
    i32 8, label %sw.bb1
    i32 6, label %sw.bb1
    i32 24, label %while.cond.backedge
  ]

while.body.backedge:                              ; preds = %while.body, %while.cond.backedge
  br label %while.body

sw.bb1:                                           ; preds = %while.body, %while.body, %while.body
  store i32 2, i32* @a, align 4, !tbaa !1
  br label %while.cond.backedge

while.cond.backedge:                              ; preds = %while.body, %sw.bb1
  store i32 4, i32* @a, align 4, !tbaa !1
  %.pre19 = load i32, i32* %1, align 4, !tbaa !1
  %tobool = icmp eq i32 %.pre19, 0
  br i1 %tobool, label %while.end.loopexit, label %while.body.backedge

while.end.loopexit:                               ; preds = %while.cond.backedge
  br label %while.end

while.end:                                        ; preds = %while.end.loopexit, %while.cond.preheader
  %3 = load i32, i32* @h, align 4, !tbaa !1
  %mul = mul nsw i32 %0, %3
  %4 = load i32, i32* @g, align 4, !tbaa !1
  %mul4 = mul nsw i32 %mul, %4
  store i32 %mul4, i32* @j, align 4, !tbaa !1
  %5 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @f, i64 0, i32 0, i32 0), align 4, !tbaa !7
  %tobool5 = icmp eq i32 %5, 0
  br i1 %tobool5, label %if.end, label %if.then

if.then:                                          ; preds = %while.end
  %div = sdiv i32 %5, %mul
  store i32 %div, i32* @g, align 4, !tbaa !1
  br label %if.end

if.end:                                           ; preds = %while.end, %if.then
  %6 = phi i32 [ %4, %while.end ], [ %div, %if.then ]
  %7 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @f, i64 0, i32 1, i32 0), align 4, !tbaa !10
  %tobool7 = icmp ne i32 %7, 0
  %tobool8 = icmp ne i32 %mul4, 0
  %or.cond = and i1 %tobool7, %tobool8
  %tobool10 = icmp ne i32 %0, 0
  %or.cond17 = and i1 %or.cond, %tobool10
  br i1 %or.cond17, label %if.then11, label %if.end13

if.then11:                                        ; preds = %if.end
  store i32 %3, i32* @d, align 4, !tbaa !1
  %8 = load i32, i32* @e, align 4, !tbaa !1
  store i32 %8, i32* %p3, align 4, !tbaa !1
  %.pre20 = load i32, i32* @g, align 4, !tbaa !1
  br label %if.end13

if.end13:                                         ; preds = %if.then11, %if.end
  %9 = phi i32 [ %.pre20, %if.then11 ], [ %6, %if.end ]
  %tobool14 = icmp eq i32 %9, 0
  br i1 %tobool14, label %if.end16, label %if.then15

if.then15:                                        ; preds = %if.end13
  store i32 %p2, i32* %p1, align 4, !tbaa !1
  br label %if.end16

if.end16:                                         ; preds = %entry, %if.end13, %if.then15
  ret i32 2
}

; CHECK: mfcr {{[0-9]+}}

!llvm.ident = !{!0}

!0 = !{!"clang version 3.9.0 (trunk 261520)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"any pointer", !3, i64 0}
!7 = !{!8, !2, i64 0}
!8 = !{!"", !9, i64 0, !9, i64 4}
!9 = !{!"", !2, i64 0}
!10 = !{!8, !2, i64 4}