diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-19 21:23:40 +0000 |
commit | bdbe302c3396ceb4dd89d1214485439598f05368 (patch) | |
tree | ccf66c6349b23061ed5e9645c21f15fbe718da8b /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | |
parent | e7a1904fe1ced461b2a31f03b6592ae6564a243a (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 324 |
1 files changed, 273 insertions, 51 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 1b05acd5c90a..ed2e7e4f189e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -91,9 +91,11 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); int64_t Offset; - if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets. + if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets. + Offset = SignExtend64<24>(Imm); + } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets. Offset = Imm & 0xFFFFF; - } else { // GFX9+ supports 21-bit signed offsets. + } else { // GFX9+ supports 21-bit signed offsets. Offset = SignExtend64<21>(Imm); } return addOperand(Inst, MCOperand::createImm(Offset)); @@ -105,6 +107,13 @@ static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, return addOperand(Inst, DAsm->decodeBoolReg(Val)); } +static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, + uint64_t Addr, + const MCDisassembler *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand(Inst, DAsm->decodeSplitBarrier(Val)); +} + #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \ uint64_t /*Addr*/, \ @@ -200,10 +209,12 @@ DECODE_OPERAND_REG_8(VReg_512) DECODE_OPERAND_REG_8(VReg_1024) DECODE_OPERAND_REG_7(SReg_32, OPW32) +DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32) DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32) DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32) DECODE_OPERAND_REG_7(SReg_64, OPW64) DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64) +DECODE_OPERAND_REG_7(SReg_96, OPW96) DECODE_OPERAND_REG_7(SReg_128, OPW128) DECODE_OPERAND_REG_7(SReg_256, OPW256) DECODE_OPERAND_REG_7(SReg_512, OPW512) @@ -238,6 +249,7 @@ DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128) DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64) DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16) DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32) DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16) DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16) @@ -259,6 +271,62 @@ DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32) DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16) DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16) DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32) + +static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "10-bit encoding expected"); + assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used"); + + bool IsHi = Imm & (1 << 9); + unsigned RegIdx = Imm & 0xff; + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); +} + +static DecodeStatus +DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<8>(Imm) && "8-bit encoding expected"); + + bool IsHi = Imm & (1 << 7); + unsigned RegIdx = Imm & 0x7f; + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); +} + +static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<9>(Imm) && "9-bit encoding expected"); + + const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + bool IsVGPR = Imm & (1 << 8); + if (IsVGPR) { + bool IsHi = Imm & (1 << 7); + unsigned RegIdx = Imm & 0x7f; + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); + } + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16, + Imm & 0xFF, false, 16)); +} + +static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "10-bit encoding expected"); + + const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + bool IsVGPR = Imm & (1 << 8); + if (IsVGPR) { + bool IsHi = Imm & (1 << 9); + unsigned RegIdx = Imm & 0xff; + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); + } + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16, + Imm & 0xFF, false, 16)); +} static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -321,6 +389,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); } +static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const MCDisassembler *Decoder) { + assert(Imm < (1 << 9) && "9-bit encoding"); + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand( + Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true)); +} + static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { @@ -371,18 +448,19 @@ DECODE_SDWA(VopcDst) template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { assert(Bytes.size() >= sizeof(T)); - const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); + const auto Res = + support::endian::read<T, llvm::endianness::little>(Bytes.data()); Bytes = Bytes.slice(sizeof(T)); return Res; } static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) { assert(Bytes.size() >= 12); - uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>( - Bytes.data()); + uint64_t Lo = + support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data()); Bytes = Bytes.slice(8); - uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>( - Bytes.data()); + uint64_t Hi = + support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data()); Bytes = Bytes.slice(4); return DecoderUInt128(Lo, Hi); } @@ -418,25 +496,48 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // encodings if (isGFX11Plus() && Bytes.size() >= 12 ) { DecoderUInt128 DecW = eat12Bytes(Bytes); - Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS); + Res = + tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696, + MI, DecW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS); - if (Res) { - if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) + Res = + tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696, + MI, DecW, Address, CS); + if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) + break; + MI = MCInst(); // clear + + const auto convertVOPDPP = [&]() { + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) { convertVOP3PDPPInst(MI); - else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) + } else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) { convertVOPCDPPInst(MI); // Special VOP3 case - else { + } else { assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3); convertVOP3DPPInst(MI); // Regular VOP3 case } + }; + Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696, + MI, DecW, Address, CS); + if (Res) { + convertVOPDPP(); + break; + } + Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696, + MI, DecW, Address, CS); + if (Res) { + convertVOPDPP(); break; } Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS); if (Res) break; + + Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS); + if (Res) + break; } // Reinitialize Bytes Bytes = Bytes_.slice(0, MaxInstBytesNum); @@ -461,7 +562,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableDPP8GFX1164, + DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS); + if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) + break; + MI = MCInst(); // clear + + Res = tryDecodeInst(DecoderTableDPP8GFX1264, + DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear @@ -469,7 +577,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664, + MI, QW, Address, CS); + if (Res) { + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) + convertVOPCDPPInst(MI); + break; + } + + Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664, + MI, QW, Address, CS); if (Res) { if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) convertVOPCDPPInst(MI); @@ -530,9 +647,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS); + Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW, + Address, CS); if (Res) break; + Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW, + Address, CS); + if (Res) + break; + if (Bytes.size() < 4) break; const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; @@ -560,7 +683,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW, + Address, CS); + if (Res) + break; + + Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW, + Address, CS); if (Res) break; @@ -640,6 +769,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = convertMIMGInst(MI); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))) + Res = convertMIMGInst(MI); + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)) Res = convertEXPInst(MI); @@ -679,7 +812,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { - if (STI.hasFeature(AMDGPU::FeatureGFX11)) { + if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) { // The MCInst still has these fields even though they are no longer encoded // in the GFX11 instruction. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm); @@ -690,9 +823,13 @@ DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 || MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 || MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 || - MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) { + MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 || + MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) { // The MCInst has this field that is not directly encoded in the // instruction. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel); @@ -840,6 +977,7 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { // VADDR size. Consequently, decoded instructions always show address as if it // has 1 dword, which could be not really so. DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + auto TSFlags = MCII->get(MI.getOpcode()).TSFlags; int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst); @@ -848,8 +986,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AMDGPU::OpName::vdata); int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); - int RsrcIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); + int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc + : AMDGPU::OpName::rsrc; + int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); @@ -870,7 +1009,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { } bool IsAtomic = (VDstIdx != -1); - bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; + bool IsGather4 = TSFlags & SIInstrFlags::Gather4; + bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE; bool IsNSA = false; bool IsPartialNSA = false; unsigned AddrSize = Info->VAddrDwords; @@ -887,10 +1027,13 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AddrSize = AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI)); + // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms. + // VIMAGE insts other than BVH never use vaddr4. IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA || - Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA; + Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA || + Info->MIMGEncoding == AMDGPU::MIMGEncGfx12; if (!IsNSA) { - if (AddrSize > 12) + if (!IsVSample && AddrSize > 12) AddrSize = 16; } else { if (AddrSize > Info->VAddrDwords) { @@ -1098,6 +1241,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, case AMDGPU::TTMP_64RegClassID: shift = 1; break; + case AMDGPU::SGPR_96RegClassID: + case AMDGPU::TTMP_96RegClassID: case AMDGPU::SGPR_128RegClassID: case AMDGPU::TTMP_128RegClassID: // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in @@ -1132,6 +1277,13 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, return createRegOperand(SRegClassID, Val >> shift); } +MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx, + bool IsHi) const { + unsigned RCID = + IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID; + return createRegOperand(RCID, RegIdx); +} + // Decode Literals for insts which always have a literal in the encoding MCOperand AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { @@ -1147,7 +1299,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { return MCOperand::createImm(Literal); } -MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { +MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants @@ -1157,9 +1309,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { Twine(Bytes.size())); } HasLiteral = true; - Literal = eatBytes<uint32_t>(Bytes); + Literal = Literal64 = eatBytes<uint32_t>(Bytes); + if (ExtendFP64) + Literal64 <<= 32; } - return MCOperand::createImm(Literal); + return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -1376,7 +1530,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral, - unsigned ImmWidth) const { + unsigned ImmWidth, bool IsFP) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1388,6 +1542,20 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, return createRegOperand(IsAGPR ? getAgprClassId(Width) : getVgprClassId(Width), Val - VGPR_MIN); } + return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth, + IsFP); +} + +MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, + unsigned Val, + bool MandatoryLiteral, + unsigned ImmWidth, + bool IsFP) const { + // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been + // decoded earlier. + assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); + using namespace AMDGPU::EncValues; + if (Val <= SGPR_MAX) { // "SGPR_MIN <= Val" is always true and causes compilation warning. static_assert(SGPR_MIN == 0); @@ -1410,7 +1578,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, // Keep a sentinel value for deferred setting return MCOperand::createImm(LITERAL_CONST); else - return decodeLiteralConstant(); + return decodeLiteralConstant(IsFP && ImmWidth == 64); } switch (Width) { @@ -1590,6 +1758,10 @@ MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const { : decodeSrcOp(OPW32, Val); } +MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const { + return decodeSrcOp(OPW32, Val); +} + bool AMDGPUDisassembler::isVI() const { return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); } @@ -1616,11 +1788,18 @@ bool AMDGPUDisassembler::isGFX11Plus() const { return AMDGPU::isGFX11Plus(STI); } +bool AMDGPUDisassembler::isGFX12Plus() const { + return AMDGPU::isGFX12Plus(STI); +} bool AMDGPUDisassembler::hasArchitectedFlatScratch() const { return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); } +bool AMDGPUDisassembler::hasKernargPreload() const { + return AMDGPU::hasKernargPreload(STI); +} + //===----------------------------------------------------------------------===// // AMDGPU specific symbol handling //===----------------------------------------------------------------------===// @@ -1704,12 +1883,16 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) return MCDisassembler::Fail; - PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); + if (!isGFX12Plus()) + PRINT_DIRECTIVE(".amdhsa_dx10_clamp", + COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) return MCDisassembler::Fail; - PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); + if (!isGFX12Plus()) + PRINT_DIRECTIVE(".amdhsa_ieee_mode", + COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) return MCDisassembler::Fail; @@ -1717,17 +1900,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) return MCDisassembler::Fail; - PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); + if (isGFX9Plus()) + PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL); - if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) + if (!isGFX9Plus()) + if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0) + return MCDisassembler::Fail; + if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1) return MCDisassembler::Fail; + if (!isGFX10Plus()) + if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2) + return MCDisassembler::Fail; if (isGFX10Plus()) { PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", - COMPUTE_PGM_RSRC1_WGP_MODE); - PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); - PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); + COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE); + PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED); + PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); } + + if (isGFX12Plus()) + PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling", + COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN); + return MCDisassembler::Success; } @@ -1807,16 +2002,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( PRINT_PSEUDO_DIRECTIVE_COMMENT( "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); } - PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", - COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE); - PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", - COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START); - PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", - COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END); - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0) + + if (isGFX11Plus()) { + PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", + COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE); + PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", + COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START); + PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", + COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END); + } else { + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0) + return MCDisassembler::Fail; + } + + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1) return MCDisassembler::Fail; - PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", - COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START); + + if (isGFX11Plus()) { + PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", + COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START); + } else { + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2) + return MCDisassembler::Fail; + } } else if (FourByteBuffer) { return MCDisassembler::Fail; } @@ -1945,10 +2153,24 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( return MCDisassembler::Success; - case amdhsa::RESERVED2_OFFSET: - // 6 bytes from here are reserved, must be 0. - ReservedBytes = DE.getBytes(Cursor, 6); - for (int I = 0; I < 6; ++I) { + case amdhsa::KERNARG_PRELOAD_OFFSET: + using namespace amdhsa; + TwoByteBuffer = DE.getU16(Cursor); + if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) { + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length", + KERNARG_PRELOAD_SPEC_LENGTH); + } + + if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) { + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset", + KERNARG_PRELOAD_SPEC_OFFSET); + } + return MCDisassembler::Success; + + case amdhsa::RESERVED3_OFFSET: + // 4 bytes from here are reserved, must be 0. + ReservedBytes = DE.getBytes(Cursor, 4); + for (int I = 0; I < 4; ++I) { if (ReservedBytes[I] != 0) return MCDisassembler::Fail; } @@ -1975,7 +2197,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( if (isGFX10Plus()) { uint16_t KernelCodeProperties = support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET], - support::endianness::little); + llvm::endianness::little); EnableWavefrontSize32 = AMDHSA_BITS_GET(KernelCodeProperties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); @@ -2018,7 +2240,7 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, // Code Object V3 kernel descriptors. StringRef Name = Symbol.Name; - if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { + if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) { Size = 64; // Size = 64 regardless of success or failure. return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); } |