aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-07-27 23:34:35 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-12-01 12:32:12 +0000
commitfbc266465ed3585efdbd8e9ebf71e97ce7e8b464 (patch)
tree7560c2cbec09e542e5f2e2100ffc16ca742b1075 /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
parentac8517f04c0fe31968ed43e36608ad02d72d3597 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp1131
1 files changed, 601 insertions, 530 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index a9968cfe25b4..1a0dc7098347 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -21,6 +21,7 @@
#include "SIDefines.h"
#include "SIRegisterInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm-c/DisassemblerTypes.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -44,13 +45,41 @@ using namespace llvm;
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
+static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
+ !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+ MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
+ // If there is no default wave size it must be a generation before gfx10,
+ // these have FeatureWavefrontSize64 in their definition already. For gfx10+
+ // set wave32 as a default.
+ STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ return STICopy;
+ }
+
+ return STI;
+}
+
AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
MCContext &Ctx, MCInstrInfo const *MCII)
- : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
- MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
+ : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
+ MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
+ TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
+ CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
report_fatal_error("Disassembly not yet supported for subtarget");
+
+ for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
+ createConstantSymbolExpr(Symbol, Code);
+
+ UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
+ UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
+ UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
+}
+
+void AMDGPUDisassembler::setABIVersion(unsigned Version) {
+ CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version);
}
inline static MCDisassembler::DecodeStatus
@@ -114,6 +143,12 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
}
+static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeDpp8FI(Val));
+}
+
#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
uint64_t /*Addr*/, \
@@ -145,6 +180,18 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
MandatoryLiteral, ImmWidth)); \
}
+static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
+ AMDGPUDisassembler::OpWidthTy OpWidth,
+ unsigned Imm, unsigned EncImm,
+ bool MandatoryLiteral, unsigned ImmWidth,
+ AMDGPU::OperandSemantics Sema,
+ const MCDisassembler *Decoder) {
+ assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
+ ImmWidth, Sema));
+}
+
// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
// get register class. Used by SGPR only operands.
#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
@@ -154,46 +201,75 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
// Used by AV_ register classes (AGPR or VGPR only register operands).
-#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \
- DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \
- Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
+template <AMDGPUDisassembler::OpWidthTy OpWidth>
+static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
+ false, 0, AMDGPU::OperandSemantics::INT, Decoder);
+}
// Decoder for Src(9-bit encoding) registers only.
-#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \
- DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
+template <AMDGPUDisassembler::OpWidthTy OpWidth>
+static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
+ uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
+ AMDGPU::OperandSemantics::INT, Decoder);
+}
// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
// only.
-#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \
- DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
+template <AMDGPUDisassembler::OpWidthTy OpWidth>
+static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
+ AMDGPU::OperandSemantics::INT, Decoder);
+}
// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
// Imm{9} is acc, registers only.
-#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \
- DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
+template <AMDGPUDisassembler::OpWidthTy OpWidth>
+static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
+ uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
+ AMDGPU::OperandSemantics::INT, Decoder);
+}
// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
// register from RegClass or immediate. Registers that don't belong to RegClass
// will be decoded and InstPrinter will report warning. Immediate will be
// decoded into constant of size ImmWidth, should match width of immediate used
// by OperandType (important for floating point types).
-#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \
- DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \
- false, ImmWidth)
-
-#define DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(Name, OpWidth, ImmWidth) \
- DECODE_SrcOp(decodeOperand_##Name, 9, OpWidth, Imm, false, ImmWidth)
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
+static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
+ uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
+}
// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
// and decode using 'enum10' from decodeSrcOp.
-#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \
- DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \
- Imm | 512, false, ImmWidth)
-
-#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \
- DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \
- OpWidth, Imm, true, ImmWidth)
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
+static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
+ uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
+}
+
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
+static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm,
+ uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
+}
// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
// when RegisterClass is used as an operand. Most often used for destination
@@ -229,60 +305,6 @@ DECODE_OPERAND_REG_8(AReg_256)
DECODE_OPERAND_REG_8(AReg_512)
DECODE_OPERAND_REG_8(AReg_1024)
-DECODE_OPERAND_REG_AV10(AVDst_128, OPW128)
-DECODE_OPERAND_REG_AV10(AVDst_512, OPW512)
-
-// Decoders for register only source RegisterOperands that use use 9-bit Src
-// encoding: 'decodeOperand_<RegClass>'.
-
-DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32)
-DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64)
-DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128)
-DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256)
-DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32)
-
-DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32)
-
-DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32)
-DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64)
-DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
-
-// Decoders for register or immediate RegisterOperands that use 9-bit Src
-// encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'.
-
-DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 16)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 16)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
-
-DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2I16, OPW32, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2F16, OPW32, 16)
-
-DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64)
-DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64)
-DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
-
-DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
-DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
-DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
-DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32)
-
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
uint64_t /*Addr*/,
const MCDisassembler *Decoder) {
@@ -365,9 +387,9 @@ static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
}
-static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
- AMDGPUDisassembler::OpWidthTy Opw,
- const MCDisassembler *Decoder) {
+static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
+ AMDGPUDisassembler::OpWidthTy Opw,
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
if (!DAsm->isGFX90A()) {
Imm &= 511;
@@ -399,48 +421,21 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
}
+template <AMDGPUDisassembler::OpWidthTy Opw>
+static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
+ uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ return decodeAVLdSt(Inst, Imm, Opw, Decoder);
+}
+
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const MCDisassembler *Decoder) {
assert(Imm < (1 << 9) && "9-bit encoding");
auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- return addOperand(
- Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));
-}
-
-static DecodeStatus
-DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- return decodeOperand_AVLdSt_Any(Inst, Imm,
- AMDGPUDisassembler::OPW32, Decoder);
-}
-
-static DecodeStatus
-DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- return decodeOperand_AVLdSt_Any(Inst, Imm,
- AMDGPUDisassembler::OPW64, Decoder);
-}
-
-static DecodeStatus
-DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- return decodeOperand_AVLdSt_Any(Inst, Imm,
- AMDGPUDisassembler::OPW96, Decoder);
-}
-
-static DecodeStatus
-DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- return decodeOperand_AVLdSt_Any(Inst, Imm,
- AMDGPUDisassembler::OPW128, Decoder);
-}
-
-static DecodeStatus
-DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW160,
- Decoder);
+ return addOperand(Inst,
+ DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
+ AMDGPU::OperandSemantics::FP64));
}
#define DECODE_SDWA(DecName) \
@@ -450,6 +445,13 @@ DECODE_SDWA(Src32)
DECODE_SDWA(Src16)
DECODE_SDWA(VopcDst)
+static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
+ uint64_t /* Addr */,
+ const MCDisassembler *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeVersionImm(Imm));
+}
+
#include "AMDGPUGenDisassemblerTables.inc"
//===----------------------------------------------------------------------===//
@@ -475,29 +477,17 @@ static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
return DecoderUInt128(Lo, Hi);
}
-// The disassembler is greedy, so we need to check FI operand value to
-// not parse a dpp if the correct literal is not set. For dpp16 the
-// autogenerated decoder checks the dpp literal
-static bool isValidDPP8(const MCInst &MI) {
- using namespace llvm::AMDGPU::DPP;
- int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
- assert(FiIdx != -1);
- if ((unsigned)FiIdx >= MI.getNumOperands())
- return false;
- unsigned Fi = MI.getOperand(FiIdx).getImm();
- return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
-}
-
DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
raw_ostream &CS) const {
- bool IsSDWA = false;
-
unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
Bytes = Bytes_.slice(0, MaxInstBytesNum);
- DecodeStatus Res = MCDisassembler::Fail;
+ // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
+ // there are fewer bytes left). This will be overridden on success.
+ Size = std::min((size_t)4, Bytes_.size());
+
do {
// ToDo: better to switch encoding length using some bit predicate
// but it is unknown yet, so try all we can
@@ -506,234 +496,158 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
- Res =
- tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
- MI, DecW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
- break;
- MI = MCInst(); // clear
- Res =
- tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,
- MI, DecW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
- break;
- MI = MCInst(); // clear
-
- const auto convertVOPDPP = [&]() {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) {
- convertVOP3PDPPInst(MI);
- } else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) {
- convertVOPCDPPInst(MI); // Special VOP3 case
- } else {
- assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
- convertVOP3DPPInst(MI); // Regular VOP3 case
- }
- };
- Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
- MI, DecW, Address, CS);
- if (Res) {
- convertVOPDPP();
- break;
- }
- Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696,
- MI, DecW, Address, CS);
- if (Res) {
- convertVOPDPP();
- break;
- }
- Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
- if (Res)
+
+ if (isGFX11() &&
+ tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
+ DecW, Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);
- if (Res)
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
+ DecW, Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS);
- if (Res)
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
break;
+
+ // Reinitialize Bytes
+ Bytes = Bytes_.slice(0, MaxInstBytesNum);
}
- // Reinitialize Bytes
- Bytes = Bytes_.slice(0, MaxInstBytesNum);
if (Bytes.size() >= 8) {
const uint64_t QW = eatBytes<uint64_t>(Bytes);
- if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
- Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
- if (Res) {
- if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
- == -1)
- break;
- if (convertDPP8Inst(MI) == MCDisassembler::Success)
- break;
- MI = MCInst(); // clear
- }
- }
+ if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
+ tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
+ tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
break;
- MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP8GFX1164,
- DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
+ // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
+ // table first so we print the correct name.
+ if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
+ tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
break;
- MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP8GFX1264,
- DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
+ tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
break;
- MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
- if (Res) break;
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
+ tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
- MI, QW, Address, CS);
- if (Res) {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
- convertVOPCDPPInst(MI);
+ if ((isVI() || isGFX9()) &&
+ tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
break;
- }
- Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664,
- MI, QW, Address, CS);
- if (Res) {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
- convertVOPCDPPInst(MI);
+ if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
break;
- }
- Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
+ if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
+ Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
+ if (isGFX11() &&
+ tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
+ Address, CS))
+ break;
- if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
- Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
- if (Res)
- break;
- }
+ if (isGFX11() &&
+ tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
+ break;
- // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
- // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
- // table first so we print the correct name.
- if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
- Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
- if (Res)
- break;
- }
- }
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
+ break;
- // Reinitialize Bytes as DPP64 could have eaten too much
- Bytes = Bytes_.slice(0, MaxInstBytesNum);
+ // Reinitialize Bytes
+ Bytes = Bytes_.slice(0, MaxInstBytesNum);
+ }
// Try decode 32-bit instruction
- if (Bytes.size() < 4) break;
- const uint32_t DW = eatBytes<uint32_t>(Bytes);
- Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
- if (Res) break;
+ if (Bytes.size() >= 4) {
+ const uint32_t DW = eatBytes<uint32_t>(Bytes);
- Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
- if (Res) break;
-
- if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
- Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
- if (Res)
+ if ((isVI() || isGFX9()) &&
+ tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
break;
- }
- if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
- Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
- if (Res) break;
- }
+ if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
- if (Res) break;
+ if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
- Address, CS);
- if (Res) break;
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
+ tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
- Address, CS);
- if (Res)
- break;
+ if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
+ tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
+ break;
- if (Bytes.size() < 4) break;
- const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
+ if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
+ break;
- if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
- Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
- if (Res)
+ if (isGFX11() &&
+ tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
+ Address, CS))
break;
- }
- if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
- Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
- if (Res)
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
+ Address, CS))
break;
}
- Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
- Address, CS);
- if (Res)
- break;
-
- Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
- Address, CS);
- if (Res)
- break;
+ return MCDisassembler::Fail;
+ } while (false);
- Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
- if (Res)
- break;
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
+ if (isMacDPP(MI))
+ convertMacDPPInst(MI);
- Res = tryDecodeInst(DecoderTableWMMAGFX1264, MI, QW, Address, CS);
- } while (false);
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
+ convertVOP3PDPPInst(MI);
+ else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
+ AMDGPU::isVOPC64DPP(MI.getOpcode()))
+ convertVOPCDPPInst(MI); // Special VOP3 case
+ else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
+ -1)
+ convertDPP8Inst(MI);
+ else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
+ convertVOP3DPPInst(MI); // Regular VOP3 case
+ }
- if (Res && AMDGPU::isMAC(MI.getOpcode())) {
+ if (AMDGPU::isMAC(MI.getOpcode())) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
}
- if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
- MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
+ if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
!AMDGPU::hasGDS(STI)) {
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
+ if (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::cpol);
if (CPolPos != -1) {
@@ -749,9 +663,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
- (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
+ if ((MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
+ (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
// GFX90A lost TFE, its place is occupied by ACC.
int TFEOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
@@ -762,8 +676,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
+ if (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
int SWZOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
if (SWZOpIdx != -1) {
@@ -773,7 +687,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
int RsrcIdx =
@@ -781,36 +695,32 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
if (VAddr0Idx >= 0 && NSAArgs > 0) {
unsigned NSAWords = (NSAArgs + 3) / 4;
- if (Bytes.size() < 4 * NSAWords) {
- Res = MCDisassembler::Fail;
- } else {
- for (unsigned i = 0; i < NSAArgs; ++i) {
- const unsigned VAddrIdx = VAddr0Idx + 1 + i;
- auto VAddrRCID =
- MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
- MI.insert(MI.begin() + VAddrIdx,
- createRegOperand(VAddrRCID, Bytes[i]));
- }
- Bytes = Bytes.slice(4 * NSAWords);
+ if (Bytes.size() < 4 * NSAWords)
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < NSAArgs; ++i) {
+ const unsigned VAddrIdx = VAddr0Idx + 1 + i;
+ auto VAddrRCID =
+ MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
+ MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
}
+ Bytes = Bytes.slice(4 * NSAWords);
}
- if (Res)
- Res = convertMIMGInst(MI);
+ convertMIMGInst(MI);
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)))
- Res = convertMIMGInst(MI);
+ if (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
+ convertMIMGInst(MI);
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
- Res = convertEXPInst(MI);
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
+ convertEXPInst(MI);
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
- Res = convertVINTERPInst(MI);
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
+ convertVINTERPInst(MI);
- if (Res && IsSDWA)
- Res = convertSDWAInst(MI);
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
+ convertSDWAInst(MI);
int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdst_in);
@@ -831,27 +741,23 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
int ImmLitIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
- if (Res && ImmLitIdx != -1 && !IsSOPK)
- Res = convertFMAanyK(MI, ImmLitIdx);
+ if (ImmLitIdx != -1 && !IsSOPK)
+ convertFMAanyK(MI, ImmLitIdx);
- // if the opcode was not recognized we'll assume a Size of 4 bytes
- // (unless there are fewer bytes left)
- Size = Res ? (MaxInstBytesNum - Bytes.size())
- : std::min((size_t)4, Bytes_.size());
- return Res;
+ Size = MaxInstBytesNum - Bytes.size();
+ return MCDisassembler::Success;
}
-DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
// The MCInst still has these fields even though they are no longer encoded
// in the GFX11 instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
}
- return MCDisassembler::Success;
}
-DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
@@ -864,10 +770,9 @@ DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
// instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
}
- return MCDisassembler::Success;
}
-DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
STI.hasFeature(AMDGPU::FeatureGFX10)) {
if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
@@ -884,7 +789,6 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
}
}
- return MCDisassembler::Success;
}
struct VOPModifiers {
@@ -924,6 +828,41 @@ static VOPModifiers collectVOPModifiers(const MCInst &MI,
return Modifiers;
}
+// Instructions decode the op_sel/suffix bits into the src_modifier
+// operands. Copy those bits into the src operands for true16 VGPRs.
+void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
+ const unsigned Opc = MI.getOpcode();
+ const MCRegisterClass &ConversionRC =
+ MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
+ constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
+ {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
+ SISrcMods::OP_SEL_0},
+ {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
+ SISrcMods::OP_SEL_0},
+ {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
+ SISrcMods::OP_SEL_0},
+ {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
+ SISrcMods::DST_OP_SEL}}};
+ for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
+ int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
+ if (OpIdx == -1 || OpModsIdx == -1)
+ continue;
+ MCOperand &Op = MI.getOperand(OpIdx);
+ if (!Op.isReg())
+ continue;
+ if (!ConversionRC.contains(Op.getReg()))
+ continue;
+ unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
+ const MCOperand &OpMods = MI.getOperand(OpModsIdx);
+ unsigned ModVal = OpMods.getImm();
+ if (ModVal & OpSelMask) { // isHi
+ unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
+ Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
+ }
+ }
+}
+
// MAC opcodes have special old and src2 operands.
// src2 is tied to dst, while old is not tied (but assumed to be).
bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
@@ -953,64 +892,43 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
AMDGPU::OpName::src2_modifiers);
}
-// We must check FI == literal to reject not genuine dpp8 insts, and we must
-// first add optional MI operands to check FI
-DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
+void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
- if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
- convertVOP3PDPPInst(MI);
- } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
- AMDGPU::isVOPC64DPP(Opc)) {
- convertVOPCDPPInst(MI);
- } else {
- if (isMacDPP(MI))
- convertMacDPPInst(MI);
-
- int VDstInIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
- if (VDstInIdx != -1)
- insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+ int VDstInIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+ if (VDstInIdx != -1)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
- if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
- MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
- insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
+ convertTrue16OpSel(MI);
+ auto Mods = collectVOPModifiers(MI);
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
+ AMDGPU::OpName::op_sel);
+ } else {
+ // Insert dummy unused src modifiers.
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src0_modifiers);
- unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
- auto Mods = collectVOPModifiers(MI);
- insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
- AMDGPU::OpName::op_sel);
- } else {
- // Insert dummy unused src modifiers.
- if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
- insertNamedMCOperand(MI, MCOperand::createImm(0),
- AMDGPU::OpName::src0_modifiers);
-
- if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
- insertNamedMCOperand(MI, MCOperand::createImm(0),
- AMDGPU::OpName::src1_modifiers);
- }
+ AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src1_modifiers);
}
- return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
}
-DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
- if (isMacDPP(MI))
- convertMacDPPInst(MI);
+void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
+ convertTrue16OpSel(MI);
int VDstInIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
if (VDstInIdx != -1)
insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
- if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
- MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
- insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
-
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
@@ -1019,13 +937,12 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
AMDGPU::OpName::op_sel);
}
- return MCDisassembler::Success;
}
// Note that before gfx10, the MIMG encoding provided no information about
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
-DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
@@ -1035,8 +952,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::OpName::vdata);
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
- int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
- : AMDGPU::OpName::rsrc;
+ int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
@@ -1054,7 +971,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (BaseOpcode->BVH) {
// Add A16 operand for intersect_ray instructions
addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
- return MCDisassembler::Success;
+ return;
}
bool IsAtomic = (VDstIdx != -1);
@@ -1089,7 +1006,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
// The NSA encoding does not contain enough operands for the
// combination of base opcode / dimension. Should this be an error?
- return MCDisassembler::Success;
+ return;
}
IsPartialNSA = true;
}
@@ -1108,12 +1025,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
DstSize += 1;
if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
- return MCDisassembler::Success;
+ return;
int NewOpcode =
AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
if (NewOpcode == -1)
- return MCDisassembler::Success;
+ return;
// Widen the register to the correct number of enabled channels.
unsigned NewVdata = AMDGPU::NoRegister;
@@ -1130,7 +1047,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (NewVdata == AMDGPU::NoRegister) {
// It's possible to encode this such that the low register + enabled
// components exceeds the register count.
- return MCDisassembler::Success;
+ return;
}
}
@@ -1148,7 +1065,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
&MRI.getRegClass(AddrRCID));
if (!NewVAddrSA)
- return MCDisassembler::Success;
+ return;
}
MI.setOpcode(NewOpcode);
@@ -1169,14 +1086,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
MI.erase(MI.begin() + VAddr0Idx + AddrSize,
MI.begin() + VAddr0Idx + Info->VAddrDwords);
}
-
- return MCDisassembler::Success;
}
// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
// decoder only adds to src_modifiers, so manually add the bits to the other
// operands.
-DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
auto Mods = collectVOPModifiers(MI, true);
@@ -1201,12 +1116,10 @@ DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
AMDGPU::OpName::neg_hi);
-
- return MCDisassembler::Success;
}
// Create dummy old operand and insert optional operands
-DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
@@ -1223,11 +1136,9 @@ DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src1_modifiers);
- return MCDisassembler::Success;
}
-DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
- int ImmLitIdx) const {
+void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
assert(HasLiteral && "Should have decoded a literal");
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
unsigned DescNumOps = Desc.getNumOperands();
@@ -1243,7 +1154,6 @@ DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
IsDeferredOp)
Op.setImm(Literal);
}
- return MCDisassembler::Success;
}
const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
@@ -1424,7 +1334,7 @@ static int64_t getInlineImmVal64(unsigned Imm) {
}
}
-static int64_t getInlineImmVal16(unsigned Imm) {
+static int64_t getInlineImmValF16(unsigned Imm) {
switch (Imm) {
case 240:
return 0x3800;
@@ -1449,9 +1359,40 @@ static int64_t getInlineImmVal16(unsigned Imm) {
}
}
-MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
- assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
- && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
+static int64_t getInlineImmValBF16(unsigned Imm) {
+ switch (Imm) {
+ case 240:
+ return 0x3F00;
+ case 241:
+ return 0xBF00;
+ case 242:
+ return 0x3F80;
+ case 243:
+ return 0xBF80;
+ case 244:
+ return 0x4000;
+ case 245:
+ return 0xC000;
+ case 246:
+ return 0x4080;
+ case 247:
+ return 0xC080;
+ case 248: // 1 / (2 * PI)
+ return 0x3E22;
+ default:
+ llvm_unreachable("invalid fp inline imm");
+ }
+}
+
+static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
+ return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm)
+ : getInlineImmValF16(Imm);
+}
+
+MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
+ AMDGPU::OperandSemantics Sema) {
+ assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN &&
+ Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
// ToDo: case 248: 1/(2*PI) - is allowed only on VI
// ImmWidth 0 is a default case where operand should not allow immediates.
@@ -1464,7 +1405,7 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
case 64:
return MCOperand::createImm(getInlineImmVal64(Imm));
case 16:
- return MCOperand::createImm(getInlineImmVal16(Imm));
+ return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
default:
llvm_unreachable("implement me");
}
@@ -1578,7 +1519,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
bool MandatoryLiteral,
- unsigned ImmWidth, bool IsFP) const {
+ unsigned ImmWidth,
+ AMDGPU::OperandSemantics Sema) const {
using namespace AMDGPU::EncValues;
assert(Val < 1024); // enum10
@@ -1591,14 +1533,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
: getVgprClassId(Width), Val - VGPR_MIN);
}
return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
- IsFP);
+ Sema);
}
-MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
- unsigned Val,
- bool MandatoryLiteral,
- unsigned ImmWidth,
- bool IsFP) const {
+MCOperand
+AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
+ bool MandatoryLiteral, unsigned ImmWidth,
+ AMDGPU::OperandSemantics Sema) const {
// Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
// decoded earlier.
assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
@@ -1619,14 +1560,13 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
return decodeIntImmed(Val);
if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
- return decodeFPImmed(ImmWidth, Val);
+ return decodeFPImmed(ImmWidth, Val, Sema);
if (Val == LITERAL_CONST) {
if (MandatoryLiteral)
// Keep a sentinel value for deferred setting
return MCOperand::createImm(LITERAL_CONST);
- else
- return decodeLiteralConstant(IsFP && ImmWidth == 64);
+ return decodeLiteralConstant(Sema == AMDGPU::OperandSemantics::FP64);
}
switch (Width) {
@@ -1723,9 +1663,10 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
return errOperand(Val, "unknown operand encoding " + Twine(Val));
}
-MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
- const unsigned Val,
- unsigned ImmWidth) const {
+MCOperand
+AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
+ unsigned ImmWidth,
+ AMDGPU::OperandSemantics Sema) const {
using namespace AMDGPU::SDWA;
using namespace AMDGPU::EncValues;
@@ -1756,21 +1697,21 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
return decodeIntImmed(SVal);
if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
- return decodeFPImmed(ImmWidth, SVal);
+ return decodeFPImmed(ImmWidth, SVal, Sema);
return decodeSpecialReg32(SVal);
- } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
- return createRegOperand(getVgprClassId(Width), Val);
}
+ if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
+ return createRegOperand(getVgprClassId(Width), Val);
llvm_unreachable("unsupported target");
}
MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
- return decodeSDWASrc(OPW16, Val, 16);
+ return decodeSDWASrc(OPW16, Val, 16, AMDGPU::OperandSemantics::FP16);
}
MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
- return decodeSDWASrc(OPW32, Val, 32);
+ return decodeSDWASrc(OPW32, Val, 32, AMDGPU::OperandSemantics::FP32);
}
MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
@@ -1789,15 +1730,13 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
if (TTmpIdx >= 0) {
auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
return createSRegOperand(TTmpClsId, TTmpIdx);
- } else if (Val > SGPR_MAX) {
- return IsWave64 ? decodeSpecialReg64(Val)
- : decodeSpecialReg32(Val);
- } else {
- return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
}
- } else {
- return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
+ if (Val > SGPR_MAX) {
+ return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val);
+ }
+ return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
}
+ return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
}
MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
@@ -1810,6 +1749,47 @@ MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
return decodeSrcOp(OPW32, Val);
}
+MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
+ if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
+ return MCOperand();
+ return MCOperand::createImm(Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
+ using VersionField = AMDGPU::EncodingField<7, 0>;
+ using W64Bit = AMDGPU::EncodingBit<13>;
+ using W32Bit = AMDGPU::EncodingBit<14>;
+ using MDPBit = AMDGPU::EncodingBit<15>;
+ using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
+
+ auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
+
+ // Decode into a plain immediate if any unused bits are raised.
+ if (Encoding::encode(Version, W64, W32, MDP) != Imm)
+ return MCOperand::createImm(Imm);
+
+ const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
+ auto I = find_if(Versions,
+ [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
+ return V.Code == Version;
+ });
+ MCContext &Ctx = getContext();
+ const MCExpr *E;
+ if (I == Versions.end())
+ E = MCConstantExpr::create(Version, Ctx);
+ else
+ E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
+
+ if (W64)
+ E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
+ if (W32)
+ E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
+ if (MDP)
+ E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
+
+ return MCOperand::createExpr(E);
+}
+
bool AMDGPUDisassembler::isVI() const {
return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
}
@@ -1836,6 +1816,10 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
return AMDGPU::isGFX11Plus(STI);
}
+bool AMDGPUDisassembler::isGFX12() const {
+ return STI.hasFeature(AMDGPU::FeatureGFX12);
+}
+
bool AMDGPUDisassembler::isGFX12Plus() const {
return AMDGPU::isGFX12Plus(STI);
}
@@ -1851,6 +1835,29 @@ bool AMDGPUDisassembler::hasKernargPreload() const {
//===----------------------------------------------------------------------===//
// AMDGPU specific symbol handling
//===----------------------------------------------------------------------===//
+
+/// Print a string describing the reserved bit range specified by Mask with
+/// offset BaseBytes for use in error comments. Mask is a single continuous
+/// range of 1s surrounded by zeros. The format here is meant to align with the
+/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
+static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
+ SmallString<32> Result;
+ raw_svector_ostream S(Result);
+
+ int TrailingZeros = llvm::countr_zero(Mask);
+ int PopCount = llvm::popcount(Mask);
+
+ if (PopCount == 1) {
+ S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
+ } else {
+ S << "bits in range ("
+ << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
+ << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
+ }
+
+ return Result;
+}
+
#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
do { \
@@ -1862,8 +1869,26 @@ bool AMDGPUDisassembler::hasKernargPreload() const {
<< GET_FIELD(MASK) << '\n'; \
} while (0)
+#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
+ do { \
+ if (FourByteBuffer & (MASK)) { \
+ return createStringError(std::errc::invalid_argument, \
+ "kernel descriptor " DESC \
+ " reserved %s set" MSG, \
+ getBitRangeFromMask((MASK), 0).c_str()); \
+ } \
+ } while (0)
+
+#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
+#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
+ CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
+#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
+ CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
+#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
+ CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
+
// NOLINTNEXTLINE(readability-identifier-naming)
-MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
+Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
using namespace amdhsa;
StringRef Indent = "\t";
@@ -1904,8 +1929,9 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
uint32_t GranulatedWavefrontSGPRCount =
GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
- if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
- return MCDisassembler::Fail;
+ if (isGFX10Plus())
+ CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
+ "must be zero on gfx10+");
uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
@@ -1916,8 +1942,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
@@ -1928,37 +1953,33 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
if (!isGFX12Plus())
PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
if (!isGFX12Plus())
PRINT_DIRECTIVE(".amdhsa_ieee_mode",
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
- return MCDisassembler::Fail;
-
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
+ CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
if (isGFX9Plus())
PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
if (!isGFX9Plus())
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
- return MCDisassembler::Fail;
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
+ "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
+
+ CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
+
if (!isGFX10Plus())
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
+ "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
if (isGFX10Plus()) {
PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
@@ -1971,11 +1992,11 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
- return MCDisassembler::Success;
+ return true;
}
// NOLINTNEXTLINE(readability-identifier-naming)
-MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
+Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
using namespace amdhsa;
StringRef Indent = "\t";
@@ -1996,14 +2017,9 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
- if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
- return MCDisassembler::Fail;
-
- if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
- return MCDisassembler::Fail;
-
- if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
+ CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
+ CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
PRINT_DIRECTIVE(
".amdhsa_exception_fp_ieee_invalid_op",
@@ -2022,14 +2038,13 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
- if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
- return MCDisassembler::Success;
+ return true;
}
// NOLINTNEXTLINE(readability-identifier-naming)
-MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
+Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
using namespace amdhsa;
StringRef Indent = "\t";
@@ -2037,11 +2052,13 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
KdStream << Indent << ".amdhsa_accum_offset "
<< (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
<< '\n';
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
- return MCDisassembler::Fail;
+
PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
- return MCDisassembler::Fail;
+
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
+ "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
+ "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
} else if (isGFX10Plus()) {
// Bits [0-3].
if (!isGFX12Plus()) {
@@ -2054,8 +2071,9 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
}
} else {
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
+ "COMPUTE_PGM_RSRC3",
+ "must be zero on gfx12+");
}
// Bits [4-11].
@@ -2070,46 +2088,76 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
PRINT_PSEUDO_DIRECTIVE_COMMENT(
"INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
} else {
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED1)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
+ "COMPUTE_PGM_RSRC3",
+ "must be zero on gfx10");
}
// Bits [12].
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
+ "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
// Bits [13].
if (isGFX12Plus()) {
PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
} else {
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
+ "COMPUTE_PGM_RSRC3",
+ "must be zero on gfx10 or gfx11");
}
// Bits [14-30].
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
+ "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
// Bits [31].
if (isGFX11Plus()) {
PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
} else {
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED5)
- return MCDisassembler::Fail;
+ CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
+ "COMPUTE_PGM_RSRC3",
+ "must be zero on gfx10");
}
} else if (FourByteBuffer) {
- return MCDisassembler::Fail;
+ return createStringError(
+ std::errc::invalid_argument,
+ "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
}
- return MCDisassembler::Success;
+ return true;
}
#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
#undef PRINT_DIRECTIVE
#undef GET_FIELD
-
-MCDisassembler::DecodeStatus
-AMDGPUDisassembler::decodeKernelDescriptorDirective(
+#undef CHECK_RESERVED_BITS_IMPL
+#undef CHECK_RESERVED_BITS
+#undef CHECK_RESERVED_BITS_MSG
+#undef CHECK_RESERVED_BITS_DESC
+#undef CHECK_RESERVED_BITS_DESC_MSG
+
+/// Create an error object to return from onSymbolStart for reserved kernel
+/// descriptor bits being set.
+static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
+ const char *Msg = "") {
+ return createStringError(
+ std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
+ getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
+}
+
+/// Create an error object to return from onSymbolStart for reserved kernel
+/// descriptor bytes being set.
+static Error createReservedKDBytesError(unsigned BaseInBytes,
+ unsigned WidthInBytes) {
+ // Create an error comment in the same format as the "Kernel Descriptor"
+ // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
+ return createStringError(
+ std::errc::invalid_argument,
+ "kernel descriptor reserved bits in range (%u:%u) set",
+ (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
+}
+
+Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
raw_string_ostream &KdStream) const {
#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
@@ -2132,46 +2180,44 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
FourByteBuffer = DE.getU32(Cursor);
KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
<< '\n';
- return MCDisassembler::Success;
+ return true;
case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
KdStream << Indent << ".amdhsa_private_segment_fixed_size "
<< FourByteBuffer << '\n';
- return MCDisassembler::Success;
+ return true;
case amdhsa::KERNARG_SIZE_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
KdStream << Indent << ".amdhsa_kernarg_size "
<< FourByteBuffer << '\n';
- return MCDisassembler::Success;
+ return true;
case amdhsa::RESERVED0_OFFSET:
// 4 reserved bytes, must be 0.
ReservedBytes = DE.getBytes(Cursor, 4);
for (int I = 0; I < 4; ++I) {
- if (ReservedBytes[I] != 0) {
- return MCDisassembler::Fail;
- }
+ if (ReservedBytes[I] != 0)
+ return createReservedKDBytesError(amdhsa::RESERVED0_OFFSET, 4);
}
- return MCDisassembler::Success;
+ return true;
case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
// KERNEL_CODE_ENTRY_BYTE_OFFSET
// So far no directive controls this for Code Object V3, so simply skip for
// disassembly.
DE.skip(Cursor, 8);
- return MCDisassembler::Success;
+ return true;
case amdhsa::RESERVED1_OFFSET:
// 20 reserved bytes, must be 0.
ReservedBytes = DE.getBytes(Cursor, 20);
for (int I = 0; I < 20; ++I) {
- if (ReservedBytes[I] != 0) {
- return MCDisassembler::Fail;
- }
+ if (ReservedBytes[I] != 0)
+ return createReservedKDBytesError(amdhsa::RESERVED1_OFFSET, 20);
}
- return MCDisassembler::Success;
+ return true;
case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
@@ -2207,26 +2253,31 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
- return MCDisassembler::Fail;
+ return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
+ amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
// Reserved for GFX9
if (isGFX9() &&
(TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
- return MCDisassembler::Fail;
- } else if (isGFX10Plus()) {
+ return createReservedKDBitsError(
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
+ }
+ if (isGFX10Plus()) {
PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
}
- // FIXME: We should be looking at the ELF header ABI version for this.
- if (AMDGPU::getDefaultAMDHSACodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
+ if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
- if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
- return MCDisassembler::Fail;
+ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
+ return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
+ amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
+ }
- return MCDisassembler::Success;
+ return true;
case amdhsa::KERNARG_PRELOAD_OFFSET:
using namespace amdhsa;
@@ -2240,29 +2291,31 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
KERNARG_PRELOAD_SPEC_OFFSET);
}
- return MCDisassembler::Success;
+ return true;
case amdhsa::RESERVED3_OFFSET:
// 4 bytes from here are reserved, must be 0.
ReservedBytes = DE.getBytes(Cursor, 4);
for (int I = 0; I < 4; ++I) {
if (ReservedBytes[I] != 0)
- return MCDisassembler::Fail;
+ return createReservedKDBytesError(amdhsa::RESERVED3_OFFSET, 4);
}
- return MCDisassembler::Success;
+ return true;
default:
llvm_unreachable("Unhandled index. Case statements cover everything.");
- return MCDisassembler::Fail;
+ return true;
}
#undef PRINT_DIRECTIVE
}
-MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
+Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
+
// CP microcode requires the kernel descriptor to be 64 aligned.
if (Bytes.size() != 64 || KdAddress % 64 != 0)
- return MCDisassembler::Fail;
+ return createStringError(std::errc::invalid_argument,
+ "kernel descriptor must be 64-byte aligned");
// FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
// requires us to know the setting of .amdhsa_wavefront_size32 in order to
@@ -2284,23 +2337,22 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
DataExtractor::Cursor C(0);
while (C && C.tell() < Bytes.size()) {
- MCDisassembler::DecodeStatus Status =
- decodeKernelDescriptorDirective(C, Bytes, KdStream);
+ Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
cantFail(C.takeError());
- if (Status == MCDisassembler::Fail)
- return MCDisassembler::Fail;
+ if (!Res)
+ return Res;
}
KdStream << ".end_amdhsa_kernel\n";
outs() << KdStream.str();
- return MCDisassembler::Success;
+ return true;
}
-std::optional<MCDisassembler::DecodeStatus>
-AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &CStream) const {
+Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
+ uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address) const {
// Right now only kernel descriptor needs to be handled.
// We ignore all other symbols for target specific handling.
// TODO:
@@ -2310,7 +2362,8 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
// amd_kernel_code_t for Code Object V2.
if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
Size = 256;
- return MCDisassembler::Fail;
+ return createStringError(std::errc::invalid_argument,
+ "code object v2 is not supported");
}
// Code Object V3 kernel descriptors.
@@ -2319,7 +2372,25 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
Size = 64; // Size = 64 regardless of success or failure.
return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
}
- return std::nullopt;
+
+ return false;
+}
+
+const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
+ int64_t Val) {
+ MCContext &Ctx = getContext();
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
+ // Note: only set value to Val on a new symbol in case an dissassembler
+ // has already been initialized in this context.
+ if (!Sym->isVariable()) {
+ Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
+ } else {
+ int64_t Res = ~Val;
+ bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
+ if (!Valid || Res != Val)
+ Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
+ }
+ return MCSymbolRefExpr::create(Sym, Ctx);
}
//===----------------------------------------------------------------------===//