Unverified Commit ab6c3d50 authored by Stanislav Mekhanoshin's avatar Stanislav Mekhanoshin Committed by GitHub
Browse files

[AMDGPU] Change the representation of double literals in operands (#68740)

A 64-bit literal can be used as a 32-bit zero or sign extended operand.
In case of double zeroes are added to the low 32 bits. Currently asm
parser stores only high 32 bits of a double into an operand. To support
codegen as requested by the
https://github.com/llvm/llvm-project/issues/67781 we need to change the
representation to store a full 64-bit value so that codegen can simply
add immediates to an instruction.

There is some code to support compatibility with existing tests and asm
kernels. We allow to use short hex strings to represent only a high 32
bit of a double value as a valid literal.
parent b3a39a9b
Loading
Loading
Loading
Loading
+18 −3
Original line number Diff line number Diff line
@@ -2141,9 +2141,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
          const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
          "Can't encode literal as exact 64-bit floating-point operand. "
          "Low 32-bits will be set to zero");
          Val &= 0xffffffff00000000u;
        }

        Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
        Inst.addOperand(MCOperand::createImm(Val));
        setImmKindLiteral();
        return;
      }
@@ -2242,7 +2243,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
      return;
    }

    Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
    Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? Val << 32 : Lo_32(Val);

    Inst.addOperand(MCOperand::createImm(Val));
    setImmKindLiteral();
    return;

@@ -4309,7 +4312,19 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
      continue;

    if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
      uint32_t Value = static_cast<uint32_t>(MO.getImm());
      uint64_t Value = static_cast<uint64_t>(MO.getImm());
      bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
                    AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
      bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);

      if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
        Error(getLitLoc(Operands), "invalid operand for instruction");
        return false;
      }

      if (IsFP64 && IsValid32Op)
        Value = Hi_32(Value);

      if (NumLiterals == 0 || LiteralValue != Value) {
        LiteralValue = Value;
        ++NumLiterals;
+20 −7
Original line number Diff line number Diff line
@@ -378,6 +378,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
  return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
}

static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
                                           uint64_t Addr,
                                           const MCDisassembler *Decoder) {
  assert(Imm < (1 << 9) && "9-bit encoding");
  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
  return addOperand(
      Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));
}

static DecodeStatus
DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
                             const MCDisassembler *Decoder) {
@@ -1219,7 +1228,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
  return MCOperand::createImm(Literal);
}

MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
  // For now all literal constants are supposed to be unsigned integer
  // ToDo: deal with signed/unsigned 64-bit integer constants
  // ToDo: deal with float/double constants
@@ -1229,9 +1238,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
                        Twine(Bytes.size()));
    }
    HasLiteral = true;
    Literal = eatBytes<uint32_t>(Bytes);
    Literal = Literal64 = eatBytes<uint32_t>(Bytes);
    if (ExtendFP64)
      Literal64 <<= 32;
  }
  return MCOperand::createImm(Literal);
  return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
}

MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
@@ -1448,7 +1459,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {

MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
                                          bool MandatoryLiteral,
                                          unsigned ImmWidth) const {
                                          unsigned ImmWidth, bool IsFP) const {
  using namespace AMDGPU::EncValues;

  assert(Val < 1024); // enum10
@@ -1460,13 +1471,15 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
    return createRegOperand(IsAGPR ? getAgprClassId(Width)
                                   : getVgprClassId(Width), Val - VGPR_MIN);
  }
  return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth);
  return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
                            IsFP);
}

MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
                                                 unsigned Val,
                                                 bool MandatoryLiteral,
                                                 unsigned ImmWidth) const {
                                                 unsigned ImmWidth,
                                                 bool IsFP) const {
  // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
  // decoded earlier.
  assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
@@ -1494,7 +1507,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
      // Keep a sentinel value for deferred setting
      return MCOperand::createImm(LITERAL_CONST);
    else
      return decodeLiteralConstant();
      return decodeLiteralConstant(IsFP && ImmWidth == 64);
  }

  switch (Width) {
+5 −4
Original line number Diff line number Diff line
@@ -97,6 +97,7 @@ private:
  const unsigned TargetMaxInstBytes;
  mutable ArrayRef<uint8_t> Bytes;
  mutable uint32_t Literal;
  mutable uint64_t Literal64;
  mutable bool HasLiteral;
  mutable std::optional<bool> EnableWavefrontSize32;

@@ -229,15 +230,15 @@ public:
  static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm);

  MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
  MCOperand decodeLiteralConstant() const;
  MCOperand decodeLiteralConstant(bool ExtendFP64) const;

  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
                        bool MandatoryLiteral = false,
                        unsigned ImmWidth = 0) const;
                        bool MandatoryLiteral = false, unsigned ImmWidth = 0,
                        bool IsFP = false) const;

  MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
                               bool MandatoryLiteral = false,
                               unsigned ImmWidth = 0) const;
                               unsigned ImmWidth = 0, bool IsFP = false) const;

  MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
  MCOperand decodeSpecialReg32(unsigned Val) const;
+10 −5
Original line number Diff line number Diff line
@@ -426,7 +426,7 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,

void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
                                         raw_ostream &O, bool IsFP) {
  int64_t SImm = static_cast<int64_t>(Imm);
  if (SImm >= -16 && SImm <= 64) {
    O << SImm;
@@ -454,7 +454,10 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
  else if (Imm == 0x3fc45f306dc9c882 &&
           STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
    O << "0.15915494309189532";
  else {
  else if (IsFP) {
    assert(AMDGPU::isValid32BitLiteral(Imm, true));
    O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
  } else {
    assert(isUInt<32>(Imm) || isInt<32>(Imm));

    // In rare situations, we will have a 32-bit literal in a 64-bit
@@ -605,11 +608,13 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
      printImmediate32(Op.getImm(), STI, O);
      break;
    case AMDGPU::OPERAND_REG_IMM_INT64:
    case AMDGPU::OPERAND_REG_IMM_FP64:
    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
      printImmediate64(Op.getImm(), STI, O, false);
      break;
    case AMDGPU::OPERAND_REG_IMM_FP64:
    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
    case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
      printImmediate64(Op.getImm(), STI, O);
      printImmediate64(Op.getImm(), STI, O, true);
      break;
    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -671,7 +676,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
      if (RCBits == 32)
        printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O);
      else if (RCBits == 64)
        printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O);
        printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O, true);
      else
        llvm_unreachable("Invalid register class size");
    }
+1 −1
Original line number Diff line number Diff line
@@ -91,7 +91,7 @@ private:
  void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
                        raw_ostream &O);
  void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
                        raw_ostream &O);
                        raw_ostream &O, bool IsFP);
  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                    raw_ostream &O);
  void printRegularOperand(const MCInst *MI, unsigned OpNo,
Loading