diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-07-27 23:34:35 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-12-01 12:32:12 +0000 |
commit | fbc266465ed3585efdbd8e9ebf71e97ce7e8b464 (patch) | |
tree | 7560c2cbec09e542e5f2e2100ffc16ca742b1075 /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | |
parent | ac8517f04c0fe31968ed43e36608ad02d72d3597 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 1131 |
1 files changed, 601 insertions, 530 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index a9968cfe25b4..1a0dc7098347 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -21,6 +21,7 @@ #include "SIDefines.h" #include "SIRegisterInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" +#include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm-c/DisassemblerTypes.h" #include "llvm/BinaryFormat/ELF.h" @@ -44,13 +45,41 @@ using namespace llvm; using DecodeStatus = llvm::MCDisassembler::DecodeStatus; +static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI, + MCContext &Ctx) { + if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) && + !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) { + MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI); + // If there is no default wave size it must be a generation before gfx10, + // these have FeatureWavefrontSize64 in their definition already. For gfx10+ + // set wave32 as a default. + STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32); + return STICopy; + } + + return STI; +} + AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII) - : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), - MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) { + : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII), + MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()), + TargetMaxInstBytes(MAI.getMaxInstLength(&STI)), + CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) { // ToDo: AMDGPUDisassembler supports only VI ISA. if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus()) report_fatal_error("Disassembly not yet supported for subtarget"); + + for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions()) + createConstantSymbolExpr(Symbol, Code); + + UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000); + UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000); + UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000); +} + +void AMDGPUDisassembler::setABIVersion(unsigned Version) { + CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version); } inline static MCDisassembler::DecodeStatus @@ -114,6 +143,12 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, return addOperand(Inst, DAsm->decodeSplitBarrier(Val)); } +static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, + const MCDisassembler *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand(Inst, DAsm->decodeDpp8FI(Val)); +} + #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \ uint64_t /*Addr*/, \ @@ -145,6 +180,18 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, MandatoryLiteral, ImmWidth)); \ } +static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, + AMDGPUDisassembler::OpWidthTy OpWidth, + unsigned Imm, unsigned EncImm, + bool MandatoryLiteral, unsigned ImmWidth, + AMDGPU::OperandSemantics Sema, + const MCDisassembler *Decoder) { + assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!"); + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral, + ImmWidth, Sema)); +} + // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to // get register class. Used by SGPR only operands. #define DECODE_OPERAND_REG_7(RegClass, OpWidth) \ @@ -154,46 +201,75 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC). // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp. // Used by AV_ register classes (AGPR or VGPR only register operands). -#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \ - DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \ - Imm | AMDGPU::EncValues::IS_VGPR, false, 0) +template <AMDGPUDisassembler::OpWidthTy OpWidth> +static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, + const MCDisassembler *Decoder) { + return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR, + false, 0, AMDGPU::OperandSemantics::INT, Decoder); +} // Decoder for Src(9-bit encoding) registers only. -#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \ - DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0) +template <AMDGPUDisassembler::OpWidthTy OpWidth> +static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, + uint64_t /* Addr */, + const MCDisassembler *Decoder) { + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0, + AMDGPU::OperandSemantics::INT, Decoder); +} // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers // only. -#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \ - DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0) +template <AMDGPUDisassembler::OpWidthTy OpWidth> +static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, + const MCDisassembler *Decoder) { + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0, + AMDGPU::OperandSemantics::INT, Decoder); +} // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding // Imm{9} is acc, registers only. -#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \ - DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0) +template <AMDGPUDisassembler::OpWidthTy OpWidth> +static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, + uint64_t /* Addr */, + const MCDisassembler *Decoder) { + return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0, + AMDGPU::OperandSemantics::INT, Decoder); +} // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be // register from RegClass or immediate. Registers that don't belong to RegClass // will be decoded and InstPrinter will report warning. Immediate will be // decoded into constant of size ImmWidth, should match width of immediate used // by OperandType (important for floating point types). -#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \ - DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \ - false, ImmWidth) - -#define DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(Name, OpWidth, ImmWidth) \ - DECODE_SrcOp(decodeOperand_##Name, 9, OpWidth, Imm, false, ImmWidth) +template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, + unsigned OperandSemantics> +static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, + uint64_t /* Addr */, + const MCDisassembler *Decoder) { + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth, + (AMDGPU::OperandSemantics)OperandSemantics, Decoder); +} // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc) // and decode using 'enum10' from decodeSrcOp. -#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \ - DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \ - Imm | 512, false, ImmWidth) - -#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \ - DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \ - OpWidth, Imm, true, ImmWidth) +template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, + unsigned OperandSemantics> +static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, + uint64_t /* Addr */, + const MCDisassembler *Decoder) { + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth, + (AMDGPU::OperandSemantics)OperandSemantics, Decoder); +} + +template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, + unsigned OperandSemantics> +static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, + uint64_t /* Addr */, + const MCDisassembler *Decoder) { + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth, + (AMDGPU::OperandSemantics)OperandSemantics, Decoder); +} // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass' // when RegisterClass is used as an operand. Most often used for destination @@ -229,60 +305,6 @@ DECODE_OPERAND_REG_8(AReg_256) DECODE_OPERAND_REG_8(AReg_512) DECODE_OPERAND_REG_8(AReg_1024) -DECODE_OPERAND_REG_AV10(AVDst_128, OPW128) -DECODE_OPERAND_REG_AV10(AVDst_512, OPW512) - -// Decoders for register only source RegisterOperands that use use 9-bit Src -// encoding: 'decodeOperand_<RegClass>'. - -DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32) -DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64) -DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128) -DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256) -DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32) - -DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32) - -DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32) -DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64) -DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128) - -// Decoders for register or immediate RegisterOperands that use 9-bit Src -// encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'. - -DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64) -DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16) -DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 16) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 16) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32) - -DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2I16, OPW32, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2F16, OPW32, 16) - -DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64) -DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64) -DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32) - -DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16) -DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16) -DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32) -DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32) - static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, const MCDisassembler *Decoder) { @@ -365,9 +387,9 @@ static bool IsAGPROperand(const MCInst &Inst, int OpIdx, return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255; } -static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, - AMDGPUDisassembler::OpWidthTy Opw, - const MCDisassembler *Decoder) { +static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, + AMDGPUDisassembler::OpWidthTy Opw, + const MCDisassembler *Decoder) { auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); if (!DAsm->isGFX90A()) { Imm &= 511; @@ -399,48 +421,21 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); } +template <AMDGPUDisassembler::OpWidthTy Opw> +static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, + uint64_t /* Addr */, + const MCDisassembler *Decoder) { + return decodeAVLdSt(Inst, Imm, Opw, Decoder); +} + static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { assert(Imm < (1 << 9) && "9-bit encoding"); auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); - return addOperand( - Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true)); -} - -static DecodeStatus -DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, - const MCDisassembler *Decoder) { - return decodeOperand_AVLdSt_Any(Inst, Imm, - AMDGPUDisassembler::OPW32, Decoder); -} - -static DecodeStatus -DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, - const MCDisassembler *Decoder) { - return decodeOperand_AVLdSt_Any(Inst, Imm, - AMDGPUDisassembler::OPW64, Decoder); -} - -static DecodeStatus -DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, - const MCDisassembler *Decoder) { - return decodeOperand_AVLdSt_Any(Inst, Imm, - AMDGPUDisassembler::OPW96, Decoder); -} - -static DecodeStatus -DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, - const MCDisassembler *Decoder) { - return decodeOperand_AVLdSt_Any(Inst, Imm, - AMDGPUDisassembler::OPW128, Decoder); -} - -static DecodeStatus -DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, - const MCDisassembler *Decoder) { - return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW160, - Decoder); + return addOperand(Inst, + DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, + AMDGPU::OperandSemantics::FP64)); } #define DECODE_SDWA(DecName) \ @@ -450,6 +445,13 @@ DECODE_SDWA(Src32) DECODE_SDWA(Src16) DECODE_SDWA(VopcDst) +static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, + uint64_t /* Addr */, + const MCDisassembler *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand(Inst, DAsm->decodeVersionImm(Imm)); +} + #include "AMDGPUGenDisassemblerTables.inc" //===----------------------------------------------------------------------===// @@ -475,29 +477,17 @@ static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) { return DecoderUInt128(Lo, Hi); } -// The disassembler is greedy, so we need to check FI operand value to -// not parse a dpp if the correct literal is not set. For dpp16 the -// autogenerated decoder checks the dpp literal -static bool isValidDPP8(const MCInst &MI) { - using namespace llvm::AMDGPU::DPP; - int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi); - assert(FiIdx != -1); - if ((unsigned)FiIdx >= MI.getNumOperands()) - return false; - unsigned Fi = MI.getOperand(FiIdx).getImm(); - return Fi == DPP8_FI_0 || Fi == DPP8_FI_1; -} - DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes_, uint64_t Address, raw_ostream &CS) const { - bool IsSDWA = false; - unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size()); Bytes = Bytes_.slice(0, MaxInstBytesNum); - DecodeStatus Res = MCDisassembler::Fail; + // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless + // there are fewer bytes left). This will be overridden on success. + Size = std::min((size_t)4, Bytes_.size()); + do { // ToDo: better to switch encoding length using some bit predicate // but it is unknown yet, so try all we can @@ -506,234 +496,158 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // encodings if (isGFX11Plus() && Bytes.size() >= 12 ) { DecoderUInt128 DecW = eat12Bytes(Bytes); - Res = - tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696, - MI, DecW, Address, CS); - if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) - break; - MI = MCInst(); // clear - Res = - tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696, - MI, DecW, Address, CS); - if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) - break; - MI = MCInst(); // clear - - const auto convertVOPDPP = [&]() { - if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) { - convertVOP3PDPPInst(MI); - } else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) { - convertVOPCDPPInst(MI); // Special VOP3 case - } else { - assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3); - convertVOP3DPPInst(MI); // Regular VOP3 case - } - }; - Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696, - MI, DecW, Address, CS); - if (Res) { - convertVOPDPP(); - break; - } - Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696, - MI, DecW, Address, CS); - if (Res) { - convertVOPDPP(); - break; - } - Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS); - if (Res) + + if (isGFX11() && + tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI, + DecW, Address, CS)) break; - Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS); - if (Res) + if (isGFX12() && + tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI, + DecW, Address, CS)) break; - Res = tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS); - if (Res) + if (isGFX12() && + tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS)) break; + + // Reinitialize Bytes + Bytes = Bytes_.slice(0, MaxInstBytesNum); } - // Reinitialize Bytes - Bytes = Bytes_.slice(0, MaxInstBytesNum); if (Bytes.size() >= 8) { const uint64_t QW = eatBytes<uint64_t>(Bytes); - if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) { - Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS); - if (Res) { - if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) - == -1) - break; - if (convertDPP8Inst(MI) == MCDisassembler::Success) - break; - MI = MCInst(); // clear - } - } + if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) && + tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS)) + break; - Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS); - if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) + if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && + tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS)) break; - MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPP8GFX1164, - DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS); - if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) + // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and + // v_mad_mixhi_f16 for FMA variants. Try to decode using this special + // table first so we print the correct name. + if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) && + tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS)) break; - MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPP8GFX1264, - DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS); - if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) + if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) && + tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS)) break; - MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS); - if (Res) break; + if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) && + tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS)) + break; - Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664, - MI, QW, Address, CS); - if (Res) { - if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) - convertVOPCDPPInst(MI); + if ((isVI() || isGFX9()) && + tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS)) break; - } - Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664, - MI, QW, Address, CS); - if (Res) { - if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) - convertVOPCDPPInst(MI); + if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS)) break; - } - Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS); - if (Res) { IsSDWA = true; break; } + if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS)) + break; - Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS); - if (Res) { IsSDWA = true; break; } + if (isGFX12() && + tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW, + Address, CS)) + break; - Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS); - if (Res) { IsSDWA = true; break; } + if (isGFX11() && + tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW, + Address, CS)) + break; - if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) { - Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS); - if (Res) - break; - } + if (isGFX11() && + tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS)) + break; - // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and - // v_mad_mixhi_f16 for FMA variants. Try to decode using this special - // table first so we print the correct name. - if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) { - Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS); - if (Res) - break; - } - } + if (isGFX12() && + tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS)) + break; - // Reinitialize Bytes as DPP64 could have eaten too much - Bytes = Bytes_.slice(0, MaxInstBytesNum); + // Reinitialize Bytes + Bytes = Bytes_.slice(0, MaxInstBytesNum); + } // Try decode 32-bit instruction - if (Bytes.size() < 4) break; - const uint32_t DW = eatBytes<uint32_t>(Bytes); - Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS); - if (Res) break; - - Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS); - if (Res) break; + if (Bytes.size() >= 4) { + const uint32_t DW = eatBytes<uint32_t>(Bytes); - Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS); - if (Res) break; - - if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) { - Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS); - if (Res) + if ((isVI() || isGFX9()) && + tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS)) break; - } - if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) { - Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS); - if (Res) break; - } + if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS)) + break; - Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS); - if (Res) break; + if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS)) + break; - Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW, - Address, CS); - if (Res) break; + if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) && + tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS)) + break; - Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW, - Address, CS); - if (Res) - break; + if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) && + tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS)) + break; - if (Bytes.size() < 4) break; - const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; + if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS)) + break; - if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) { - Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS); - if (Res) + if (isGFX11() && + tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW, + Address, CS)) break; - } - if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) { - Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS); - if (Res) + if (isGFX12() && + tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW, + Address, CS)) break; } - Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS); - if (Res) break; - - Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS); - if (Res) break; - - Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS); - if (Res) break; - - Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS); - if (Res) break; - - Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW, - Address, CS); - if (Res) - break; - - Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW, - Address, CS); - if (Res) - break; + return MCDisassembler::Fail; + } while (false); - Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS); - if (Res) - break; + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) { + if (isMacDPP(MI)) + convertMacDPPInst(MI); - Res = tryDecodeInst(DecoderTableWMMAGFX1264, MI, QW, Address, CS); - } while (false); + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) + convertVOP3PDPPInst(MI); + else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) || + AMDGPU::isVOPC64DPP(MI.getOpcode())) + convertVOPCDPPInst(MI); // Special VOP3 case + else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) != + -1) + convertDPP8Inst(MI); + else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) + convertVOP3DPPInst(MI); // Regular VOP3 case + } - if (Res && AMDGPU::isMAC(MI.getOpcode())) { + if (AMDGPU::isMAC(MI.getOpcode())) { // Insert dummy unused src2_modifiers. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src2_modifiers); } - if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp || - MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) { + if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp || + MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) { // Insert dummy unused src2_modifiers. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src2_modifiers); } - if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) && + if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) && !AMDGPU::hasGDS(STI)) { insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds); } - if (Res && (MCII->get(MI.getOpcode()).TSFlags & - (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) { + if (MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) { int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cpol); if (CPolPos != -1) { @@ -749,9 +663,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - if (Res && (MCII->get(MI.getOpcode()).TSFlags & - (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) && - (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) { + if ((MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) && + (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) { // GFX90A lost TFE, its place is occupied by ACC. int TFEOpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); @@ -762,8 +676,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - if (Res && (MCII->get(MI.getOpcode()).TSFlags & - (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) { + if (MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) { int SWZOpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz); if (SWZOpIdx != -1) { @@ -773,7 +687,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) { int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); int RsrcIdx = @@ -781,36 +695,32 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1; if (VAddr0Idx >= 0 && NSAArgs > 0) { unsigned NSAWords = (NSAArgs + 3) / 4; - if (Bytes.size() < 4 * NSAWords) { - Res = MCDisassembler::Fail; - } else { - for (unsigned i = 0; i < NSAArgs; ++i) { - const unsigned VAddrIdx = VAddr0Idx + 1 + i; - auto VAddrRCID = - MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass; - MI.insert(MI.begin() + VAddrIdx, - createRegOperand(VAddrRCID, Bytes[i])); - } - Bytes = Bytes.slice(4 * NSAWords); + if (Bytes.size() < 4 * NSAWords) + return MCDisassembler::Fail; + for (unsigned i = 0; i < NSAArgs; ++i) { + const unsigned VAddrIdx = VAddr0Idx + 1 + i; + auto VAddrRCID = + MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass; + MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i])); } + Bytes = Bytes.slice(4 * NSAWords); } - if (Res) - Res = convertMIMGInst(MI); + convertMIMGInst(MI); } - if (Res && (MCII->get(MI.getOpcode()).TSFlags & - (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))) - Res = convertMIMGInst(MI); + if (MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)) + convertMIMGInst(MI); - if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)) - Res = convertEXPInst(MI); + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP) + convertEXPInst(MI); - if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)) - Res = convertVINTERPInst(MI); + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP) + convertVINTERPInst(MI); - if (Res && IsSDWA) - Res = convertSDWAInst(MI); + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA) + convertSDWAInst(MI); int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); @@ -831,27 +741,23 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, int ImmLitIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm); bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK; - if (Res && ImmLitIdx != -1 && !IsSOPK) - Res = convertFMAanyK(MI, ImmLitIdx); + if (ImmLitIdx != -1 && !IsSOPK) + convertFMAanyK(MI, ImmLitIdx); - // if the opcode was not recognized we'll assume a Size of 4 bytes - // (unless there are fewer bytes left) - Size = Res ? (MaxInstBytesNum - Bytes.size()) - : std::min((size_t)4, Bytes_.size()); - return Res; + Size = MaxInstBytesNum - Bytes.size(); + return MCDisassembler::Success; } -DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { +void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) { // The MCInst still has these fields even though they are no longer encoded // in the GFX11 instruction. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm); insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr); } - return MCDisassembler::Success; } -DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { +void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 || MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 || MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 || @@ -864,10 +770,9 @@ DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { // instruction. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel); } - return MCDisassembler::Success; } -DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { +void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { if (STI.hasFeature(AMDGPU::FeatureGFX9) || STI.hasFeature(AMDGPU::FeatureGFX10)) { if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst)) @@ -884,7 +789,6 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); } } - return MCDisassembler::Success; } struct VOPModifiers { @@ -924,6 +828,41 @@ static VOPModifiers collectVOPModifiers(const MCInst &MI, return Modifiers; } +// Instructions decode the op_sel/suffix bits into the src_modifier +// operands. Copy those bits into the src operands for true16 VGPRs. +void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const { + const unsigned Opc = MI.getOpcode(); + const MCRegisterClass &ConversionRC = + MRI.getRegClass(AMDGPU::VGPR_16RegClassID); + constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = { + {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers, + SISrcMods::OP_SEL_0}, + {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers, + SISrcMods::OP_SEL_0}, + {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers, + SISrcMods::OP_SEL_0}, + {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers, + SISrcMods::DST_OP_SEL}}}; + for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); + int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName); + if (OpIdx == -1 || OpModsIdx == -1) + continue; + MCOperand &Op = MI.getOperand(OpIdx); + if (!Op.isReg()) + continue; + if (!ConversionRC.contains(Op.getReg())) + continue; + unsigned OpEnc = MRI.getEncodingValue(Op.getReg()); + const MCOperand &OpMods = MI.getOperand(OpModsIdx); + unsigned ModVal = OpMods.getImm(); + if (ModVal & OpSelMask) { // isHi + unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK; + Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1)); + } + } +} + // MAC opcodes have special old and src2 operands. // src2 is tied to dst, while old is not tied (but assumed to be). bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const { @@ -953,64 +892,43 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const { AMDGPU::OpName::src2_modifiers); } -// We must check FI == literal to reject not genuine dpp8 insts, and we must -// first add optional MI operands to check FI -DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { +void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); - if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) { - convertVOP3PDPPInst(MI); - } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) || - AMDGPU::isVOPC64DPP(Opc)) { - convertVOPCDPPInst(MI); - } else { - if (isMacDPP(MI)) - convertMacDPPInst(MI); - - int VDstInIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); - if (VDstInIdx != -1) - insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); + int VDstInIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); + if (VDstInIdx != -1) + insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); - if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || - MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12) - insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2); + unsigned DescNumOps = MCII->get(Opc).getNumOperands(); + if (MI.getNumOperands() < DescNumOps && + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { + convertTrue16OpSel(MI); + auto Mods = collectVOPModifiers(MI); + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), + AMDGPU::OpName::op_sel); + } else { + // Insert dummy unused src modifiers. + if (MI.getNumOperands() < DescNumOps && + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src0_modifiers); - unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && - AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { - auto Mods = collectVOPModifiers(MI); - insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), - AMDGPU::OpName::op_sel); - } else { - // Insert dummy unused src modifiers. - if (MI.getNumOperands() < DescNumOps && - AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) - insertNamedMCOperand(MI, MCOperand::createImm(0), - AMDGPU::OpName::src0_modifiers); - - if (MI.getNumOperands() < DescNumOps && - AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) - insertNamedMCOperand(MI, MCOperand::createImm(0), - AMDGPU::OpName::src1_modifiers); - } + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) + insertNamedMCOperand(MI, MCOperand::createImm(0), + AMDGPU::OpName::src1_modifiers); } - return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail; } -DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { - if (isMacDPP(MI)) - convertMacDPPInst(MI); +void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { + convertTrue16OpSel(MI); int VDstInIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); if (VDstInIdx != -1) insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); - if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || - MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12) - insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2); - unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && @@ -1019,13 +937,12 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); } - return MCDisassembler::Success; } // Note that before gfx10, the MIMG encoding provided no information about // VADDR size. Consequently, decoded instructions always show address as if it // has 1 dword, which could be not really so. -DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { +void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { auto TSFlags = MCII->get(MI.getOpcode()).TSFlags; int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), @@ -1035,8 +952,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AMDGPU::OpName::vdata); int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); - int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc - : AMDGPU::OpName::rsrc; + int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc + : AMDGPU::OpName::rsrc; int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); @@ -1054,7 +971,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { if (BaseOpcode->BVH) { // Add A16 operand for intersect_ray instructions addOperand(MI, MCOperand::createImm(BaseOpcode->A16)); - return MCDisassembler::Success; + return; } bool IsAtomic = (VDstIdx != -1); @@ -1089,7 +1006,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) { // The NSA encoding does not contain enough operands for the // combination of base opcode / dimension. Should this be an error? - return MCDisassembler::Success; + return; } IsPartialNSA = true; } @@ -1108,12 +1025,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { DstSize += 1; if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords) - return MCDisassembler::Success; + return; int NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize); if (NewOpcode == -1) - return MCDisassembler::Success; + return; // Widen the register to the correct number of enabled channels. unsigned NewVdata = AMDGPU::NoRegister; @@ -1130,7 +1047,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { if (NewVdata == AMDGPU::NoRegister) { // It's possible to encode this such that the low register + enabled // components exceeds the register count. - return MCDisassembler::Success; + return; } } @@ -1148,7 +1065,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &MRI.getRegClass(AddrRCID)); if (!NewVAddrSA) - return MCDisassembler::Success; + return; } MI.setOpcode(NewOpcode); @@ -1169,14 +1086,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { MI.erase(MI.begin() + VAddr0Idx + AddrSize, MI.begin() + VAddr0Idx + Info->VAddrDwords); } - - return MCDisassembler::Success; } // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen // decoder only adds to src_modifiers, so manually add the bits to the other // operands. -DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { +void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); auto Mods = collectVOPModifiers(MI, true); @@ -1201,12 +1116,10 @@ DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi), AMDGPU::OpName::neg_hi); - - return MCDisassembler::Success; } // Create dummy old operand and insert optional operands -DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const { +void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); @@ -1223,11 +1136,9 @@ DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const { AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src1_modifiers); - return MCDisassembler::Success; } -DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI, - int ImmLitIdx) const { +void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const { assert(HasLiteral && "Should have decoded a literal"); const MCInstrDesc &Desc = MCII->get(MI.getOpcode()); unsigned DescNumOps = Desc.getNumOperands(); @@ -1243,7 +1154,6 @@ DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI, IsDeferredOp) Op.setImm(Literal); } - return MCDisassembler::Success; } const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { @@ -1424,7 +1334,7 @@ static int64_t getInlineImmVal64(unsigned Imm) { } } -static int64_t getInlineImmVal16(unsigned Imm) { +static int64_t getInlineImmValF16(unsigned Imm) { switch (Imm) { case 240: return 0x3800; @@ -1449,9 +1359,40 @@ static int64_t getInlineImmVal16(unsigned Imm) { } } -MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) { - assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN - && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); +static int64_t getInlineImmValBF16(unsigned Imm) { + switch (Imm) { + case 240: + return 0x3F00; + case 241: + return 0xBF00; + case 242: + return 0x3F80; + case 243: + return 0xBF80; + case 244: + return 0x4000; + case 245: + return 0xC000; + case 246: + return 0x4080; + case 247: + return 0xC080; + case 248: // 1 / (2 * PI) + return 0x3E22; + default: + llvm_unreachable("invalid fp inline imm"); + } +} + +static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) { + return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm) + : getInlineImmValF16(Imm); +} + +MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm, + AMDGPU::OperandSemantics Sema) { + assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN && + Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); // ToDo: case 248: 1/(2*PI) - is allowed only on VI // ImmWidth 0 is a default case where operand should not allow immediates. @@ -1464,7 +1405,7 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) { case 64: return MCOperand::createImm(getInlineImmVal64(Imm)); case 16: - return MCOperand::createImm(getInlineImmVal16(Imm)); + return MCOperand::createImm(getInlineImmVal16(Imm, Sema)); default: llvm_unreachable("implement me"); } @@ -1578,7 +1519,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral, - unsigned ImmWidth, bool IsFP) const { + unsigned ImmWidth, + AMDGPU::OperandSemantics Sema) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1591,14 +1533,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, : getVgprClassId(Width), Val - VGPR_MIN); } return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth, - IsFP); + Sema); } -MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, - unsigned Val, - bool MandatoryLiteral, - unsigned ImmWidth, - bool IsFP) const { +MCOperand +AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, + bool MandatoryLiteral, unsigned ImmWidth, + AMDGPU::OperandSemantics Sema) const { // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been // decoded earlier. assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); @@ -1619,14 +1560,13 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, return decodeIntImmed(Val); if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) - return decodeFPImmed(ImmWidth, Val); + return decodeFPImmed(ImmWidth, Val, Sema); if (Val == LITERAL_CONST) { if (MandatoryLiteral) // Keep a sentinel value for deferred setting return MCOperand::createImm(LITERAL_CONST); - else - return decodeLiteralConstant(IsFP && ImmWidth == 64); + return decodeLiteralConstant(Sema == AMDGPU::OperandSemantics::FP64); } switch (Width) { @@ -1723,9 +1663,10 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { return errOperand(Val, "unknown operand encoding " + Twine(Val)); } -MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, - const unsigned Val, - unsigned ImmWidth) const { +MCOperand +AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val, + unsigned ImmWidth, + AMDGPU::OperandSemantics Sema) const { using namespace AMDGPU::SDWA; using namespace AMDGPU::EncValues; @@ -1756,21 +1697,21 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, return decodeIntImmed(SVal); if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) - return decodeFPImmed(ImmWidth, SVal); + return decodeFPImmed(ImmWidth, SVal, Sema); return decodeSpecialReg32(SVal); - } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) { - return createRegOperand(getVgprClassId(Width), Val); } + if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) + return createRegOperand(getVgprClassId(Width), Val); llvm_unreachable("unsupported target"); } MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { - return decodeSDWASrc(OPW16, Val, 16); + return decodeSDWASrc(OPW16, Val, 16, AMDGPU::OperandSemantics::FP16); } MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { - return decodeSDWASrc(OPW32, Val, 32); + return decodeSDWASrc(OPW32, Val, 32, AMDGPU::OperandSemantics::FP32); } MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { @@ -1789,15 +1730,13 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { if (TTmpIdx >= 0) { auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32); return createSRegOperand(TTmpClsId, TTmpIdx); - } else if (Val > SGPR_MAX) { - return IsWave64 ? decodeSpecialReg64(Val) - : decodeSpecialReg32(Val); - } else { - return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val); } - } else { - return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO); + if (Val > SGPR_MAX) { + return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val); + } + return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val); } + return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO); } MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const { @@ -1810,6 +1749,47 @@ MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const { return decodeSrcOp(OPW32, Val); } +MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const { + if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1) + return MCOperand(); + return MCOperand::createImm(Val); +} + +MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const { + using VersionField = AMDGPU::EncodingField<7, 0>; + using W64Bit = AMDGPU::EncodingBit<13>; + using W32Bit = AMDGPU::EncodingBit<14>; + using MDPBit = AMDGPU::EncodingBit<15>; + using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>; + + auto [Version, W64, W32, MDP] = Encoding::decode(Imm); + + // Decode into a plain immediate if any unused bits are raised. + if (Encoding::encode(Version, W64, W32, MDP) != Imm) + return MCOperand::createImm(Imm); + + const auto &Versions = AMDGPU::UCVersion::getGFXVersions(); + auto I = find_if(Versions, + [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) { + return V.Code == Version; + }); + MCContext &Ctx = getContext(); + const MCExpr *E; + if (I == Versions.end()) + E = MCConstantExpr::create(Version, Ctx); + else + E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx); + + if (W64) + E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx); + if (W32) + E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx); + if (MDP) + E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx); + + return MCOperand::createExpr(E); +} + bool AMDGPUDisassembler::isVI() const { return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); } @@ -1836,6 +1816,10 @@ bool AMDGPUDisassembler::isGFX11Plus() const { return AMDGPU::isGFX11Plus(STI); } +bool AMDGPUDisassembler::isGFX12() const { + return STI.hasFeature(AMDGPU::FeatureGFX12); +} + bool AMDGPUDisassembler::isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI); } @@ -1851,6 +1835,29 @@ bool AMDGPUDisassembler::hasKernargPreload() const { //===----------------------------------------------------------------------===// // AMDGPU specific symbol handling //===----------------------------------------------------------------------===// + +/// Print a string describing the reserved bit range specified by Mask with +/// offset BaseBytes for use in error comments. Mask is a single continuous +/// range of 1s surrounded by zeros. The format here is meant to align with the +/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html. +static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) { + SmallString<32> Result; + raw_svector_ostream S(Result); + + int TrailingZeros = llvm::countr_zero(Mask); + int PopCount = llvm::popcount(Mask); + + if (PopCount == 1) { + S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')'; + } else { + S << "bits in range (" + << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':' + << (TrailingZeros + BaseBytes * CHAR_BIT) << ')'; + } + + return Result; +} + #define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK)) #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ do { \ @@ -1862,8 +1869,26 @@ bool AMDGPUDisassembler::hasKernargPreload() const { << GET_FIELD(MASK) << '\n'; \ } while (0) +#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \ + do { \ + if (FourByteBuffer & (MASK)) { \ + return createStringError(std::errc::invalid_argument, \ + "kernel descriptor " DESC \ + " reserved %s set" MSG, \ + getBitRangeFromMask((MASK), 0).c_str()); \ + } \ + } while (0) + +#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "") +#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \ + CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG) +#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \ + CHECK_RESERVED_BITS_IMPL(MASK, DESC, "") +#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \ + CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG) + // NOLINTNEXTLINE(readability-identifier-naming) -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( +Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { using namespace amdhsa; StringRef Indent = "\t"; @@ -1904,8 +1929,9 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( uint32_t GranulatedWavefrontSGPRCount = GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT); - if (isGFX10Plus() && GranulatedWavefrontSGPRCount) - return MCDisassembler::Fail; + if (isGFX10Plus()) + CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, + "must be zero on gfx10+"); uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); @@ -1916,8 +1942,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; - if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY); PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); @@ -1928,37 +1953,33 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); - if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV); if (!isGFX12Plus()) PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); - if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE); if (!isGFX12Plus()) PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); - if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY); + CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER); if (isGFX9Plus()) PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL); if (!isGFX9Plus()) - if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0) - return MCDisassembler::Fail; - if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0, + "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9"); + + CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1"); + if (!isGFX10Plus()) - if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2, + "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10"); if (isGFX10Plus()) { PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", @@ -1971,11 +1992,11 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling", COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN); - return MCDisassembler::Success; + return true; } // NOLINTNEXTLINE(readability-identifier-naming) -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( +Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { using namespace amdhsa; StringRef Indent = "\t"; @@ -1996,14 +2017,9 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); - if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH); + CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY); + CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE); PRINT_DIRECTIVE( ".amdhsa_exception_fp_ieee_invalid_op", @@ -2022,14 +2038,13 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); - if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2"); - return MCDisassembler::Success; + return true; } // NOLINTNEXTLINE(readability-identifier-naming) -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( +Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { using namespace amdhsa; StringRef Indent = "\t"; @@ -2037,11 +2052,13 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( KdStream << Indent << ".amdhsa_accum_offset " << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4 << '\n'; - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0) - return MCDisassembler::Fail; + PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT); - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1) - return MCDisassembler::Fail; + + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0, + "COMPUTE_PGM_RSRC3", "must be zero on gfx90a"); + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1, + "COMPUTE_PGM_RSRC3", "must be zero on gfx90a"); } else if (isGFX10Plus()) { // Bits [0-3]. if (!isGFX12Plus()) { @@ -2054,8 +2071,9 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); } } else { - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0, + "COMPUTE_PGM_RSRC3", + "must be zero on gfx12+"); } // Bits [4-11]. @@ -2070,46 +2088,76 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( PRINT_PSEUDO_DIRECTIVE_COMMENT( "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE); } else { - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED1) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1, + "COMPUTE_PGM_RSRC3", + "must be zero on gfx10"); } // Bits [12]. - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2, + "COMPUTE_PGM_RSRC3", "must be zero on gfx10+"); // Bits [13]. if (isGFX12Plus()) { PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN", COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN); } else { - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3, + "COMPUTE_PGM_RSRC3", + "must be zero on gfx10 or gfx11"); } // Bits [14-30]. - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4, + "COMPUTE_PGM_RSRC3", "must be zero on gfx10+"); // Bits [31]. if (isGFX11Plus()) { PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP); } else { - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED5) - return MCDisassembler::Fail; + CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5, + "COMPUTE_PGM_RSRC3", + "must be zero on gfx10"); } } else if (FourByteBuffer) { - return MCDisassembler::Fail; + return createStringError( + std::errc::invalid_argument, + "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9"); } - return MCDisassembler::Success; + return true; } #undef PRINT_PSEUDO_DIRECTIVE_COMMENT #undef PRINT_DIRECTIVE #undef GET_FIELD - -MCDisassembler::DecodeStatus -AMDGPUDisassembler::decodeKernelDescriptorDirective( +#undef CHECK_RESERVED_BITS_IMPL +#undef CHECK_RESERVED_BITS +#undef CHECK_RESERVED_BITS_MSG +#undef CHECK_RESERVED_BITS_DESC +#undef CHECK_RESERVED_BITS_DESC_MSG + +/// Create an error object to return from onSymbolStart for reserved kernel +/// descriptor bits being set. +static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, + const char *Msg = "") { + return createStringError( + std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s", + getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg); +} + +/// Create an error object to return from onSymbolStart for reserved kernel +/// descriptor bytes being set. +static Error createReservedKDBytesError(unsigned BaseInBytes, + unsigned WidthInBytes) { + // Create an error comment in the same format as the "Kernel Descriptor" + // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor . + return createStringError( + std::errc::invalid_argument, + "kernel descriptor reserved bits in range (%u:%u) set", + (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT); +} + +Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective( DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes, raw_string_ostream &KdStream) const { #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ @@ -2132,46 +2180,44 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( FourByteBuffer = DE.getU32(Cursor); KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer << '\n'; - return MCDisassembler::Success; + return true; case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: FourByteBuffer = DE.getU32(Cursor); KdStream << Indent << ".amdhsa_private_segment_fixed_size " << FourByteBuffer << '\n'; - return MCDisassembler::Success; + return true; case amdhsa::KERNARG_SIZE_OFFSET: FourByteBuffer = DE.getU32(Cursor); KdStream << Indent << ".amdhsa_kernarg_size " << FourByteBuffer << '\n'; - return MCDisassembler::Success; + return true; case amdhsa::RESERVED0_OFFSET: // 4 reserved bytes, must be 0. ReservedBytes = DE.getBytes(Cursor, 4); for (int I = 0; I < 4; ++I) { - if (ReservedBytes[I] != 0) { - return MCDisassembler::Fail; - } + if (ReservedBytes[I] != 0) + return createReservedKDBytesError(amdhsa::RESERVED0_OFFSET, 4); } - return MCDisassembler::Success; + return true; case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: // KERNEL_CODE_ENTRY_BYTE_OFFSET // So far no directive controls this for Code Object V3, so simply skip for // disassembly. DE.skip(Cursor, 8); - return MCDisassembler::Success; + return true; case amdhsa::RESERVED1_OFFSET: // 20 reserved bytes, must be 0. ReservedBytes = DE.getBytes(Cursor, 20); for (int I = 0; I < 20; ++I) { - if (ReservedBytes[I] != 0) { - return MCDisassembler::Fail; - } + if (ReservedBytes[I] != 0) + return createReservedKDBytesError(amdhsa::RESERVED1_OFFSET, 20); } - return MCDisassembler::Success; + return true; case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: FourByteBuffer = DE.getU32(Cursor); @@ -2207,26 +2253,31 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) - return MCDisassembler::Fail; + return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0, + amdhsa::KERNEL_CODE_PROPERTIES_OFFSET); // Reserved for GFX9 if (isGFX9() && (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { - return MCDisassembler::Fail; - } else if (isGFX10Plus()) { + return createReservedKDBitsError( + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, + amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9"); + } + if (isGFX10Plus()) { PRINT_DIRECTIVE(".amdhsa_wavefront_size32", KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } - // FIXME: We should be looking at the ELF header ABI version for this. - if (AMDGPU::getDefaultAMDHSACodeObjectVersion() >= AMDGPU::AMDHSA_COV5) + if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); - if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) - return MCDisassembler::Fail; + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) { + return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1, + amdhsa::KERNEL_CODE_PROPERTIES_OFFSET); + } - return MCDisassembler::Success; + return true; case amdhsa::KERNARG_PRELOAD_OFFSET: using namespace amdhsa; @@ -2240,29 +2291,31 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset", KERNARG_PRELOAD_SPEC_OFFSET); } - return MCDisassembler::Success; + return true; case amdhsa::RESERVED3_OFFSET: // 4 bytes from here are reserved, must be 0. ReservedBytes = DE.getBytes(Cursor, 4); for (int I = 0; I < 4; ++I) { if (ReservedBytes[I] != 0) - return MCDisassembler::Fail; + return createReservedKDBytesError(amdhsa::RESERVED3_OFFSET, 4); } - return MCDisassembler::Success; + return true; default: llvm_unreachable("Unhandled index. Case statements cover everything."); - return MCDisassembler::Fail; + return true; } #undef PRINT_DIRECTIVE } -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( +Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor( StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const { + // CP microcode requires the kernel descriptor to be 64 aligned. if (Bytes.size() != 64 || KdAddress % 64 != 0) - return MCDisassembler::Fail; + return createStringError(std::errc::invalid_argument, + "kernel descriptor must be 64-byte aligned"); // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10 // requires us to know the setting of .amdhsa_wavefront_size32 in order to @@ -2284,23 +2337,22 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( DataExtractor::Cursor C(0); while (C && C.tell() < Bytes.size()) { - MCDisassembler::DecodeStatus Status = - decodeKernelDescriptorDirective(C, Bytes, KdStream); + Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream); cantFail(C.takeError()); - if (Status == MCDisassembler::Fail) - return MCDisassembler::Fail; + if (!Res) + return Res; } KdStream << ".end_amdhsa_kernel\n"; outs() << KdStream.str(); - return MCDisassembler::Success; + return true; } -std::optional<MCDisassembler::DecodeStatus> -AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, - ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &CStream) const { +Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, + uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address) const { // Right now only kernel descriptor needs to be handled. // We ignore all other symbols for target specific handling. // TODO: @@ -2310,7 +2362,8 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, // amd_kernel_code_t for Code Object V2. if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { Size = 256; - return MCDisassembler::Fail; + return createStringError(std::errc::invalid_argument, + "code object v2 is not supported"); } // Code Object V3 kernel descriptors. @@ -2319,7 +2372,25 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, Size = 64; // Size = 64 regardless of success or failure. return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); } - return std::nullopt; + + return false; +} + +const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id, + int64_t Val) { + MCContext &Ctx = getContext(); + MCSymbol *Sym = Ctx.getOrCreateSymbol(Id); + // Note: only set value to Val on a new symbol in case an dissassembler + // has already been initialized in this context. + if (!Sym->isVariable()) { + Sym->setVariableValue(MCConstantExpr::create(Val, Ctx)); + } else { + int64_t Res = ~Val; + bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res); + if (!Valid || Res != Val) + Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id); + } + return MCSymbolRefExpr::create(Sym, Ctx); } //===----------------------------------------------------------------------===// |