Commit 02746d14 authored by Tom Stellard's avatar Tom Stellard
Browse files

Merging r260651:

------------------------------------------------------------------------
r260651 | Matthew.Arsenault | 2016-02-11 18:40:47 -0800 (Thu, 11 Feb 2016) | 7 lines

AMDGPU: Set element_size in private resource descriptor

Introduce a subtarget feature for this, and leave the default with
the current behavior which assumes up to 16-byte loads/stores can
be used. The field also seems to have the ability to be set to 2 bytes,
but I'm not sure what that would be used for.

------------------------------------------------------------------------

llvm-svn: 271679
parent 2fd90848
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -206,6 +206,17 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
        "true",
        "Additional intstructions for CI+">;

class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
  "max-private-element-size-"#size,
  "MaxPrivateElementSize",
  !cast<string>(size),
  "Maximum private access size may be "#size
>;

def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;

// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
                                      "FeatureDisable","true",
+19 −0
Original line number Diff line number Diff line
@@ -593,6 +593,20 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
  }
}

// This is supposed to be log2(Size)
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
  switch (Size) {
  case 4:
    return AMD_ELEMENT_4_BYTES;
  case 8:
    return AMD_ELEMENT_8_BYTES;
  case 16:
    return AMD_ELEMENT_16_BYTES;
  default:
    llvm_unreachable("invalid private_element_size");
  }
}

void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
                                         const SIProgramInfo &KernelInfo) const {
  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -606,6 +620,11 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
      (KernelInfo.ComputePGMRSrc2 << 32);
  header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;


  AMD_HSA_BITS_SET(header.code_properties,
                   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
                   getElementByteSizeValue(STM.getMaxPrivateElementSize()));

  if (MFI->hasPrivateSegmentBuffer()) {
    header.code_properties |=
      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+6 −0
Original line number Diff line number Diff line
@@ -58,6 +58,11 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
    FP32Denormals = false;
    FP64Denormals = false;
  }

  // Set defaults if needed.
  if (MaxPrivateElementSize == 0)
    MaxPrivateElementSize = 16;

  return *this;
}

@@ -72,6 +77,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
      EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
      EnableXNACK(false),
      WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
      MaxPrivateElementSize(0),
      EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
      GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
      IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
+5 −0
Original line number Diff line number Diff line
@@ -79,6 +79,7 @@ private:
  unsigned WavefrontSize;
  bool CFALUBug;
  int LocalMemorySize;
  unsigned MaxPrivateElementSize;
  bool EnableVGPRSpilling;
  bool SGPRInitBug;
  bool IsGCN;
@@ -243,6 +244,10 @@ public:
    return LocalMemorySize;
  }

  unsigned getMaxPrivateElementSize() const {
    return MaxPrivateElementSize;
  }

  bool hasSGPRInitBug() const {
    return SGPRInitBug;
  }
+9 −0
Original line number Diff line number Diff line
@@ -44,6 +44,15 @@ enum amd_code_version_t {
  AMD_CODE_VERSION_MINOR = 1
};

// Sets val bits for specified mask in specified dst packed instance.
#define AMD_HSA_BITS_SET(dst, mask, val)                                       \
  dst &= (~(1 << mask ## _SHIFT) & ~mask);                                     \
  dst |= (((val) << mask ## _SHIFT) & mask)

// Gets bits for specified mask from specified src packed instance.
#define AMD_HSA_BITS_GET(src, mask)                                            \
  ((src & mask) >> mask ## _SHIFT)                                             \

/// The values used to define the number of bytes to use for the
/// swizzle element size.
enum amd_element_byte_size_t {
Loading