Commit 4b8e483f authored by Tom Stellard's avatar Tom Stellard
Browse files

Merging r266244:

------------------------------------------------------------------------
r266244 | thomas.stellard | 2016-04-13 13:44:16 -0700 (Wed, 13 Apr 2016) | 13 lines

AMDGPU/SI: Add support for spilling VGPRs without having to scavenge registers

Summary:
When we are spilling SGPRs to scratch memory, we usually don't have
free SGPRs to do the address calculation, so we need to re-use the
ScratchOffset register for the calculation.

Reviewers: arsenm

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18917

------------------------------------------------------------------------

llvm-svn: 271765
parent 94b2adcf
Loading
Loading
Loading
Loading
+27 −10
Original line number Diff line number Diff line
@@ -227,29 +227,42 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
                                           unsigned Value,
                                           unsigned ScratchRsrcReg,
                                           unsigned ScratchOffset,
                                           int64_t Offset) const {
                                           int64_t Offset,
                                           RegScavenger *RS) const {

  MachineBasicBlock *MBB = MI->getParent();
  MachineFunction *MF = MI->getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
  LLVMContext &Ctx = MF->getFunction()->getContext();
  DebugLoc DL = MI->getDebugLoc();
  bool IsLoad = TII->get(LoadStoreOp).mayLoad();

  bool RanOutOfSGPRs = false;
  bool Scavenged = false;
  unsigned SOffset = ScratchOffset;
  unsigned OriginalImmOffset = Offset;

  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned Size = NumSubRegs * 4;

  if (!isUInt<12>(Offset + Size)) {
    SOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
    SOffset = AMDGPU::NoRegister;

    // We don't have access to the register scavenger if this function is called
    // during  PEI::scavengeFrameVirtualRegs().
    if (RS)
      SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);

    if (SOffset == AMDGPU::NoRegister) {
      // There are no free SGPRs, and since we are in the process of spilling
      // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
      // on SI/CI and on VI it is true until we implement spilling using scalar
      // stores), we have no way to free up an SGPR.  Our solution here is to
      // add the offset directly to the ScratchOffset register, and then
      // subtract the offset after the spill to return ScratchOffset to it's
      // original value.
      RanOutOfSGPRs = true;
      SOffset = AMDGPU::SGPR0;
      SOffset = ScratchOffset;
    } else {
      Scavenged = true;
    }
@@ -259,9 +272,6 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
    Offset = 0;
  }

  if (RanOutOfSGPRs)
    Ctx.emitError("Ran out of SGPRs for spilling VGPRS");

  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
    unsigned SubReg = NumSubRegs > 1 ?
        getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
@@ -282,6 +292,13 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
      .addReg(Value, RegState::Implicit | getDefRegState(IsLoad))
      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
  }

  if (RanOutOfSGPRs) {
    // Subtract the offset we added to the ScratchOffset register.
    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffset)
            .addReg(ScratchOffset)
            .addImm(OriginalImmOffset);
  }
}

void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
@@ -432,7 +449,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm());
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
      MI->eraseFromParent();
      break;
    case AMDGPU::SI_SPILL_V32_RESTORE:
@@ -446,7 +463,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm());
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
      MI->eraseFromParent();
      break;
    }
+2 −1
Original line number Diff line number Diff line
@@ -164,7 +164,8 @@ private:
  void buildScratchLoadStore(MachineBasicBlock::iterator MI,
                             unsigned LoadStoreOp, unsigned Value,
                             unsigned ScratchRsrcReg, unsigned ScratchOffset,
                             int64_t Offset) const;
                             int64_t Offset,
                             RegScavenger *RS) const;
};

} // End namespace llvm