Unverified Commit 6cfb6427 authored by Konstantin Zhuravlyov's avatar Konstantin Zhuravlyov Committed by GitHub
Browse files

AMDGPU: Minor updates to program resource registers (#69525)



- Be explicit about which program resource register is supported by
which target
    - RSRC1
      - FP16_OVFL is GFX9+
      - WGP_MODE is GFX10+
      - MEM_ORDERED is GFX10+
      - FWD_PROGRESS is GFX10+
    - RSRC3
      - INST_PREF_SIZE is GFX11+
      - TRAP_ON_START is GFX11+
      - TRAP_ON_END is GFX11+
      - IMAGE_OP is GFX11+
  - Do not emit GFX11+ fields when disassembling GFX10 code objects
  - Tighten enforcement of reserved bits in disassembler

---------

Co-authored-by: default avatarKonstantin Zhuravlyov <kzhuravl@amd.com>
parent 7b9fb1c2
Loading
Loading
Loading
Loading
+35 −11
Original line number Diff line number Diff line
@@ -79,8 +79,21 @@ enum : uint8_t {
};

// Compute program resource register 1. Must match hardware definition.
// GFX6+.
#define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
// [GFX6-GFX8].
#define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
// [GFX6-GFX9].
#define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
// GFX9+.
#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
// GFX10+.
#define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
  COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
  COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
@@ -95,11 +108,13 @@ enum : int32_t {
  COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
  COMPUTE_PGM_RSRC1(BULKY, 24, 1),
  COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
  COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1),    // GFX9+
  COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
  COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1),     // GFX10+
  COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1),  // GFX10+
  COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
  COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
  COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
  COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
  COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
  COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
  COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
  COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
};
#undef COMPUTE_PGM_RSRC1

@@ -143,15 +158,24 @@ enum : int32_t {

// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
// [GFX10].
#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
// GFX10+.
#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
// GFX11+.
#define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
  COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6),    // GFX11+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1),    // GFX11+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1),      // GFX11+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
  COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1),         // GFX11+
  COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4),
  COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8),
  COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6),
  COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1),
  COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1),
  COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19),
  COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1),
  COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
};
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS

+4 −4
Original line number Diff line number Diff line
@@ -5242,7 +5242,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
    } else if (ID == ".amdhsa_fp16_overflow") {
      if (IVersion.Major < 9)
        return Error(IDRange.Start, "directive requires gfx9+", IDRange);
      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
                       ValRange);
    } else if (ID == ".amdhsa_tg_split") {
      if (!isGFX90A())
@@ -5252,17 +5252,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
    } else if (ID == ".amdhsa_workgroup_processor_mode") {
      if (IVersion.Major < 10)
        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
                       ValRange);
    } else if (ID == ".amdhsa_memory_ordered") {
      if (IVersion.Major < 10)
        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
                       ValRange);
    } else if (ID == ".amdhsa_forward_progress") {
      if (IVersion.Major < 10)
        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
                       ValRange);
    } else if (ID == ".amdhsa_shared_vgpr_count") {
      if (IVersion.Major < 10)
+34 −14
Original line number Diff line number Diff line
@@ -1818,16 +1818,23 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
  if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
    return MCDisassembler::Fail;

  PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
  if (isGFX9Plus())
    PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);

  if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
  if (!isGFX9Plus())
    if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
      return MCDisassembler::Fail;
  if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
    return MCDisassembler::Fail;
  if (!isGFX10Plus())
    if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
      return MCDisassembler::Fail;

  if (isGFX10Plus()) {
    PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
                    COMPUTE_PGM_RSRC1_WGP_MODE);
    PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
    PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
                    COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
    PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
    PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
  }
  return MCDisassembler::Success;
}
@@ -1908,16 +1915,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
      PRINT_PSEUDO_DIRECTIVE_COMMENT(
          "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
    }

    if (isGFX11Plus()) {
      PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
      PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
      PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
    } else {
      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
        return MCDisassembler::Fail;
    }

    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
      return MCDisassembler::Fail;

    if (isGFX11Plus()) {
      PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
    } else {
      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
        return MCDisassembler::Fail;
    }
  } else if (FourByteBuffer) {
    return MCDisassembler::Fail;
  }
+4 −4
Original line number Diff line number Diff line
@@ -453,7 +453,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
  if (IVersion.Major >= 9)
    PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
                compute_pgm_rsrc1,
                amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
                amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
  if (AMDGPU::isGFX90A(STI))
    PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
                compute_pgm_rsrc3,
@@ -461,13 +461,13 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
  if (IVersion.Major >= 10) {
    PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
                compute_pgm_rsrc1,
                amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
    PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
                compute_pgm_rsrc1,
                amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
    PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
                compute_pgm_rsrc1,
                amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
    PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
                amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
  }
+2 −2
Original line number Diff line number Diff line
@@ -1163,10 +1163,10 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
                    amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
                    STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
                    amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
                    STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
                    amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
  }
  if (AMDGPU::isGFX90A(*STI)) {
    AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
Loading