Commit 50c57655 authored by Tom Stellard's avatar Tom Stellard
Browse files

Merging r262732:

------------------------------------------------------------------------
r262732 | thomas.stellard | 2016-03-04 10:31:18 -0800 (Fri, 04 Mar 2016) | 12 lines

AMDGPU/SI: Add support for spiling SGPRs to scratch buffer

Summary:
This is necessary for when we run out of VGPRs and can no
longer use v_{read,write}_lane for spilling SGPRs.

Reviewers: arsenm

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D17592

------------------------------------------------------------------------

llvm-svn: 271722
parent 8d375ce0
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -589,6 +589,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
    .addFrameIndex(FrameIndex)        // frame_idx
    .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
    .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
    .addImm(0)                              // offset
    .addMemOperand(MMO);
}

@@ -671,6 +672,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
    .addFrameIndex(FrameIndex)        // frame_idx
    .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
    .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
    .addImm(0)                              // offset
    .addMemOperand(MMO);
}

+5 −6
Original line number Diff line number Diff line
@@ -2006,18 +2006,17 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
    def _SAVE : InstSI <
      (outs),
      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
           SReg_32:$scratch_offset),
      "", []
    > {
           SReg_32:$scratch_offset, i32imm:$offset),
      "", []> {
      let mayStore = 1;
      let mayLoad = 0;
    }

    def _RESTORE : InstSI <
      (outs vgpr_class:$dst),
      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
      "", []
    > {
      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset,
           i32imm:$offset),
      "", []> {
      let mayStore = 0;
      let mayLoad = 1;
    }
+5 −10
Original line number Diff line number Diff line
@@ -162,7 +162,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
                                                       MachineFunction *MF,
                                                       unsigned FrameIndex,
                                                       unsigned SubIdx) {
  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
  MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -173,19 +173,15 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
  unsigned Lane = (Offset / 4) % 64;

  struct SpilledReg Spill;
  Spill.Lane = Lane;

  if (!LaneVGPRs.count(LaneVGPRIdx)) {
    unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);

    if (LaneVGPR == AMDGPU::NoRegister) {
      LLVMContext &Ctx = MF->getFunction()->getContext();
      Ctx.emitError("Ran out of VGPRs for spilling SGPR");
    if (LaneVGPR == AMDGPU::NoRegister)
      // We have no VGPRs left for spilling SGPRs.
      return Spill;

      // When compiling from inside Mesa, the compilation continues.
      // Select an arbitrary register to avoid triggering assertions
      // during subsequent passes.
      LaneVGPR = AMDGPU::VGPR0;
    }

    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;

@@ -198,7 +194,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
  }

  Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
  Spill.Lane = Lane;
  return Spill;
}

+2 −1
Original line number Diff line number Diff line
@@ -113,8 +113,9 @@ public:
    unsigned VGPR;
    int Lane;
    SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
    SpilledReg() : VGPR(0), Lane(-1) { }
    SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { }
    bool hasLane() { return Lane != -1;}
    bool hasReg() { return VGPR != AMDGPU::NoRegister;}
  };

  // SIMachineFunctionInfo definition
+69 −17
Original line number Diff line number Diff line
@@ -307,6 +307,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
    case AMDGPU::SI_SPILL_S64_SAVE:
    case AMDGPU::SI_SPILL_S32_SAVE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
@@ -314,6 +315,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.hasReg()) {
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
                  Spill.VGPR)
@@ -323,6 +325,27 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
          // FIXME: Since this spills to another register instead of an actual
          // frame index, we should delete the frame index when all references to
          // it are fixed.
        } else {
          // Spill SGPR to a frame index.
          // FIXME we should use S_STORE_DWORD here for VI.
          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                  .addReg(SubReg);

          unsigned Size = FrameInfo->getObjectSize(Index);
          unsigned Align = FrameInfo->getObjectAlignment(Index);
          MachinePointerInfo PtrInfo
              = MachinePointerInfo::getFixedStack(*MF, Index);
          MachineMemOperand *MMO
              = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                         Size, Align);
          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
                  .addReg(TmpReg)                         // src
                  .addFrameIndex(Index)                   // frame_idx
                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
                  .addImm(i * 4)                          // offset
                  .addMemOperand(MMO);
        }
      }
      MI->eraseFromParent();
      break;
@@ -335,6 +358,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
    case AMDGPU::SI_SPILL_S64_RESTORE:
    case AMDGPU::SI_SPILL_S32_RESTORE: {
      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
@@ -342,12 +366,38 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

        if (Spill.hasReg()) {
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                  SubReg)
                  .addReg(Spill.VGPR)
                  .addImm(Spill.Lane)
                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
        } else {
          // Restore SGPR from a stack slot.
          // FIXME: We should use S_LOAD_DWORD here for VI.

          unsigned Align = FrameInfo->getObjectAlignment(Index);
          unsigned Size = FrameInfo->getObjectSize(Index);

          MachinePointerInfo PtrInfo
              = MachinePointerInfo::getFixedStack(*MF, Index);

          MachineMemOperand *MMO = MF->getMachineMemOperand(
              PtrInfo, MachineMemOperand::MOLoad, Size, Align);

          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
                  .addFrameIndex(Index)                   // frame_idx
                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
                  .addImm(i * 4)                          // offset
                  .addMemOperand(MMO);
          BuildMI(*MBB, MI, DL,
                  TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg)
                  .addReg(TmpReg)
                  .addImm(0)
                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
        }
      }

      // TODO: only do this when it is needed
@@ -381,7 +431,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
            TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
             FrameInfo->getObjectOffset(Index));
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm());
      MI->eraseFromParent();
      break;
    case AMDGPU::SI_SPILL_V32_RESTORE:
@@ -394,7 +445,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
            TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index));
            FrameInfo->getObjectOffset(Index) +
            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm());
      MI->eraseFromParent();
      break;
    }
Loading