Commit 87568691 authored by Sebastian Neubauer's avatar Sebastian Neubauer
Browse files

[AMDGPU] Add a16 feature to gfx10

Based on D72931

This adds a new feature called A16 which is enabled for gfx10.
gfx9 keeps the R128A16 feature so it can share all the instruction encodings
with gfx7/8.

Differential Revision: https://reviews.llvm.org/D73956
parent d2e434a4
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -360,7 +360,13 @@ def FeatureDPP8 : SubtargetFeature<"dpp8",
def FeatureR128A16 : SubtargetFeature<"r128-a16",
  "HasR128A16",
  "true",
  "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
  "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128"
>;

def FeatureGFX10A16 : SubtargetFeature<"a16",
  "HasGFX10A16",
  "true",
  "Support gfx10-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands"
>;

def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
@@ -682,7 +688,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
   FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
   FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
   FeatureVOP3Literal, FeatureDPP8,
   FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
   FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
   FeatureGFX10A16
  ]
>;

@@ -1094,6 +1101,9 @@ def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
  AssemblerPredicate<"FeatureR128A16">;

def HasGFX10A16 : Predicate<"Subtarget->hasGFX10A16()">,
  AssemblerPredicate<"FeatureGFX10A16">;

def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;

+1 −0
Original line number Diff line number Diff line
@@ -241,6 +241,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
    HasDPP(false),
    HasDPP8(false),
    HasR128A16(false),
    HasGFX10A16(false),
    HasNSAEncoding(false),
    HasDLInsts(false),
    HasDot1Insts(false),
+5 −0
Original line number Diff line number Diff line
@@ -342,6 +342,7 @@ protected:
  bool HasDPP;
  bool HasDPP8;
  bool HasR128A16;
  bool HasGFX10A16;
  bool HasNSAEncoding;
  bool HasDLInsts;
  bool HasDot1Insts;
@@ -992,6 +993,10 @@ public:
    return HasR128A16;
  }

  bool hasGFX10A16() const {
    return HasGFX10A16;
  }

  bool hasOffset3fBug() const {
    return HasOffset3fBug;
  }
+15 −3
Original line number Diff line number Diff line
@@ -163,6 +163,7 @@ public:
    ImmTyUNorm,
    ImmTyDA,
    ImmTyR128A16,
    ImmTyA16,
    ImmTyLWE,
    ImmTyExpTgt,
    ImmTyExpCompr,
@@ -315,6 +316,7 @@ public:
  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
  bool isDA() const { return isImmTy(ImmTyDA); }
  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
  bool isLWE() const { return isImmTy(ImmTyLWE); }
  bool isOff() const { return isImmTy(ImmTyOff); }
  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -847,6 +849,7 @@ public:
    case ImmTyUNorm: OS << "UNorm"; break;
    case ImmTyDA: OS << "DA"; break;
    case ImmTyR128A16: OS << "R128A16"; break;
    case ImmTyA16: OS << "A16"; break;
    case ImmTyLWE: OS << "LWE"; break;
    case ImmTyOff: OS << "Off"; break;
    case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -1157,6 +1160,10 @@ public:
    return AMDGPU::hasPackedD16(getSTI());
  }

  bool hasGFX10A16() const {
    return AMDGPU::hasGFX10A16(getSTI());
  }

  bool isSI() const {
    return AMDGPU::isSI(getSTI());
  }
@@ -4650,9 +4657,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
      case AsmToken::Identifier: {
        StringRef Tok = Parser.getTok().getString();
        if (Tok == Name) {
          if (Tok == "r128" && isGFX9())
          if (Tok == "r128" && !hasMIMG_R128())
            Error(S, "r128 modifier is not supported on this GPU");
          if (Tok == "a16" && !isGFX9() && !isGFX10())
          if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
            Error(S, "a16 modifier is not supported on this GPU");
          Bit = 1;
          Parser.Lex();
@@ -4672,6 +4679,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
  if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
    return MatchOperand_ParseFail;

  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
    ImmTy = AMDGPUOperand::ImmTyR128A16;

  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
  return MatchOperand_Success;
}
@@ -5987,6 +5997,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
  if (IsGFX10)
    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
  if (!IsGFX10)
@@ -6096,7 +6108,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
  {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
  {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
  {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
  {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
  {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
  {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
  {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
  {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
+5 −0
Original line number Diff line number Diff line
@@ -244,6 +244,11 @@ void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
    printNamedBit(MI, OpNo, O, "r128");
}

void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo,
                                  const MCSubtargetInfo &STI, raw_ostream &O) {
  printNamedBit(MI, OpNo, O, "a16");
}

void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
                                 const MCSubtargetInfo &STI, raw_ostream &O) {
  printNamedBit(MI, OpNo, O, "lwe");
Loading