Commit 8e9cc63b authored by Marek Olsak's avatar Marek Olsak
Browse files

AMDGPU/SI: Add s_waitcnt at the end of non-void functions

Summary:
v2: Make ReturnsVoid private, so that I can another 8 lines of code and
    look more productive.

Reviewers: tstellarAMD, arsenm

Subscribers: arsenm

Differential Revision: http://reviews.llvm.org/D16034

llvm-svn: 257622
parent 8a0f335a
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -893,6 +893,8 @@ SDValue SITargetLowering::LowerReturn(SDValue Chain,
    return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
                                             OutVals, DL, DAG);

  Info->setIfReturnsVoid(Outs.size() == 0);

  SmallVector<ISD::OutputArg, 48> Splits;
  SmallVector<SDValue, 48> SplitVals;

+7 −1
Original line number Diff line number Diff line
@@ -84,6 +84,9 @@ private:

  bool LastInstWritesM0;

  /// \brief Whether the machine function returns void
  bool ReturnsVoid;

  /// \brief Get increment/decrement amount for this instruction.
  Counters getHwCounts(MachineInstr &MI);

@@ -322,7 +325,9 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
                               const Counters &Required) {

  // End of program? No need to wait on anything
  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
  // A function not returning void needs to wait, because other bytecode will
  // be appended after it and we don't know what it will be.
  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
    return false;

  // Figure out if the async instructions execute in order
@@ -465,6 +470,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
  LastIssued = ZeroCounts;
  LastOpcodeType = OTHER;
  LastInstWritesM0 = false;
  ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();

  memset(&UsedRegs, 0, sizeof(UsedRegs));
  memset(&DefinedRegs, 0, sizeof(DefinedRegs));
+1 −0
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
    WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
    PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
    PSInputAddr(0),
    ReturnsVoid(true),
    LDSWaveSpillSize(0),
    PSInputEna(0),
    NumUserSGPRs(0),
+9 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {

  // Graphics info.
  unsigned PSInputAddr;
  bool ReturnsVoid;

public:
  // FIXME: Make private
@@ -288,6 +289,14 @@ public:
    PSInputAddr |= 1 << Index;
  }

  bool returnsVoid() const {
    return ReturnsVoid;
  }

  void setIfReturnsVoid(bool Value) {
    ReturnsVoid = Value;
  }

  unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};