Commit 71b63bed authored by Tom Stellard's avatar Tom Stellard
Browse files

Merging r260599:

------------------------------------------------------------------------
r260599 | thomas.stellard | 2016-02-11 13:45:07 -0800 (Thu, 11 Feb 2016) | 14 lines

AMDGPU/SI: Make sure MIMG descriptors and samplers stay in SGPRs

Summary:
It's possible to have resource descriptors and samplers stored in
VGPRs, either by a VMEM instruction or in the case of samplers,
floating-point calculations.  When this happens, we need to use
v_readfirstlane to copy these values back to sgprs.

Reviewers: mareko, arsenm

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D17102

------------------------------------------------------------------------

llvm-svn: 271642
parent 03e6aa5a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -687,5 +687,6 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
  let MIMG = 1;
  let Uses = [EXEC];

  let UseNamedOperandTable = 1;
  let hasSideEffects = 0; // XXX ????
}
+42 −0
Original line number Diff line number Diff line
@@ -1968,6 +1968,32 @@ void SIInstrInfo::legalizeOperandsVOP3(
  }
}

unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI,
                                 MachineRegisterInfo &MRI) const {
  const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
  const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
  unsigned DstReg = MRI.createVirtualRegister(SRC);
  unsigned SubRegs = VRC->getSize() / 4;

  SmallVector<unsigned, 8> SRegs;
  for (unsigned i = 0; i < SubRegs; ++i) {
    unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
    BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(),
            get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
            .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
    SRegs.push_back(SGPR);
  }

  MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI,
                                    UseMI->getDebugLoc(),
                                    get(AMDGPU::REG_SEQUENCE), DstReg);
  for (unsigned i = 0; i < SubRegs; ++i) {
    MIB.addReg(SRegs[i]);
    MIB.addImm(RI.getSubRegFromChannel(i));
  }
  return DstReg;
}

void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();

@@ -2081,6 +2107,22 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
    return;
  }

  // Legalize MIMG
  if (isMIMG(*MI)) {
    MachineOperand *SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
    if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
      unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
      SRsrc->setReg(SGPR);
    }

    MachineOperand *SSamp = getNamedOperand(*MI, AMDGPU::OpName::ssamp);
    if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
      unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
      SSamp->setReg(SGPR);
    }
    return;
  }

  // Legalize MUBUF* instructions
  // FIXME: If we start using the non-addr64 instructions for compute, we
  // may need to legalize them here.
+7 −0
Original line number Diff line number Diff line
@@ -401,6 +401,13 @@ public:
  /// \brief Fix operands in \p MI to satisfy constant bus requirements.
  void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr *MI) const;

  /// Copy a value from a VGPR (\p SrcReg) to SGPR.  This function can only
  /// be used when it is know that the value in SrcReg is same across all
  /// threads in the wave.
  /// \returns The SGPR register that \p SrcReg was copied to.
  unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI,
                          MachineRegisterInfo &MRI) const;

  /// \brief Legalize all operands in this instruction.  This function may
  /// create new instruction and insert them before \p MI.
  void legalizeOperands(MachineInstr *MI) const;
+18 −0
Original line number Diff line number Diff line
@@ -479,6 +479,24 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
  }
}

const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
                                         const TargetRegisterClass *VRC) const {
  switch (VRC->getSize()) {
  case 4:
    return &AMDGPU::SGPR_32RegClass;
  case 8:
    return &AMDGPU::SReg_64RegClass;
  case 16:
    return &AMDGPU::SReg_128RegClass;
  case 32:
    return &AMDGPU::SReg_256RegClass;
  case 64:
    return &AMDGPU::SReg_512RegClass;
  default:
    llvm_unreachable("Invalid register class size");
  }
}

const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
                         const TargetRegisterClass *RC, unsigned SubIdx) const {
  if (SubIdx == AMDGPU::NoSubRegister)
+4 −0
Original line number Diff line number Diff line
@@ -89,6 +89,10 @@ public:
  const TargetRegisterClass *getEquivalentVGPRClass(
                                          const TargetRegisterClass *SRC) const;

  /// \returns A SGPR reg class with the same width as \p SRC
  const TargetRegisterClass *getEquivalentSGPRClass(
                                           const TargetRegisterClass *VRC) const;

  /// \returns The register class that is used for a sub-register of \p RC for
  /// the given \p SubIdx.  If \p SubIdx equals NoSubRegister, \p RC will
  /// be returned.
Loading