Commit c83e92e0 authored by Tom Stellard's avatar Tom Stellard
Browse files

R600/SI: Define a schedule model and enable the generic machine scheduler

The schedule model is not complete yet, and could be improved.

This is a partial merge of r227461.  The difference is that it
does not enable the machine scheduler by default.

llvm-svn: 227596
parent 0ac34190
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"

using namespace llvm;

@@ -120,3 +121,21 @@ bool AMDGPUSubtarget::isVGPRSpillingEnabled(
                                       const SIMachineFunctionInfo *MFI) const {
  return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
}

void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
                                          MachineInstr *begin,
                                          MachineInstr *end,
                                          unsigned NumRegionInstrs) const {
  if (getGeneration() >= SOUTHERN_ISLANDS) {

    // Track register pressure so the scheduler can try to decrease
    // pressure once register usage is above the threshold defined by
    // SIRegisterInfo::getRegPressureSetLimit()
    Policy.ShouldTrackPressure = true;

    // Enabling both top down and bottom up scheduling seems to give us less
    // register spills than just using one of these approaches on its own.
    Policy.OnlyTopDown = false;
    Policy.OnlyBottomUp = false;
  }
}
+12 −0
Original line number Diff line number Diff line
@@ -209,6 +209,10 @@ public:
    return getGeneration() <= NORTHERN_ISLANDS;
  }

  void overrideSchedPolicy(MachineSchedPolicy &Policy,
                           MachineInstr *begin, MachineInstr *end,
                           unsigned NumRegionInstrs) const override;

  // Helper functions to simplify if statements
  bool isTargetELF() const {
    return false;
@@ -228,6 +232,14 @@ public:
    return TargetTriple.getOS() == Triple::AMDHSA;
  }
  bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;

  unsigned getMaxWavesPerCU() const {
    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
      return 10;

    // FIXME: Not sure what this is for other subtagets.
    llvm_unreachable("do not know max waves per CU for this subtarget.");
  }
};

} // End namespace llvm
+52 −3
Original line number Diff line number Diff line
@@ -51,9 +51,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  return Reserved;
}

unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                             MachineFunction &MF) const {
  return RC->getNumRegs();
unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {

  // FIXME: We should adjust the max number of waves based on LDS size.
  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
  unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());

  for (regclass_iterator I = regclass_begin(), E = regclass_end();
       I != E; ++I) {

    unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
    unsigned Limit;

    if (isSGPRClass(*I)) {
      Limit = SGPRLimit / NumSubRegs;
    } else {
      Limit = VGPRLimit / NumSubRegs;
    }

    const int *Sets = getRegClassPressureSets(*I);
    assert(Sets);
    for (unsigned i = 0; Sets[i] != -1; ++i) {
	    if (Sets[i] == (int)Idx)
        return Limit;
    }
  }
  return 256;
}

bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
@@ -455,3 +478,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
  return AMDGPU::NoRegister;
}

unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
  switch(WaveCount) {
    case 10: return 24;
    case 9:  return 28;
    case 8:  return 32;
    case 7:  return 36;
    case 6:  return 40;
    case 5:  return 48;
    case 4:  return 64;
    case 3:  return 84;
    case 2:  return 128;
    default: return 256;
  }
}

unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
  switch(WaveCount) {
    case 10: return 48;
    case 9:  return 56;
    case 8:  return 64;
    case 7:  return 72;
    case 6:  return 80;
    case 5:  return 96;
    default: return 103;
  }
}
+10 −2
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H

#include "AMDGPURegisterInfo.h"
#include "llvm/Support/Debug.h"

namespace llvm {

@@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {

  BitVector getReservedRegs(const MachineFunction &MF) const override;

  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                               MachineFunction &MF) const override;
  unsigned getRegPressureSetLimit(unsigned Idx) const override;

  bool requiresRegisterScavenging(const MachineFunction &Fn) const override;

@@ -105,6 +105,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
  unsigned getPreloadedValue(const MachineFunction &MF,
                             enum PreloadedValue Value) const;

  /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
  ///        concurrent waves.
  unsigned getNumVGPRsAllowed(unsigned WaveCount) const;

  /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
  ///        concurrent waves.
  unsigned getNumSGPRsAllowed(unsigned WaveCount) const;

  unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
                              const TargetRegisterClass *RC) const;

+2 −2
Original line number Diff line number Diff line
@@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add

; SI-LABEL: @simple_read2st64_f32_over_max_offset
; SI-NOT: ds_read2st64_b32
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
@@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a

; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: ds_read2st64_b64
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
Loading