aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-09-02 21:17:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-01-07 23:04:38 +0000
commit0e1e0ce556810ad5f9d45485e686f0653530516c (patch)
treeab02ce7c4fafc0518430e9cec77d41201bce23f0 /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
parentc3eb0b7c19221f3a2133ab14d3ffffa61ec0c4bc (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp875
1 files changed, 332 insertions, 543 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index c4e85210848a..1b05acd5c90a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -45,13 +45,11 @@ using namespace llvm;
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
- MCContext &Ctx,
- MCInstrInfo const *MCII) :
- MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
- TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
-
+ MCContext &Ctx, MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
+ MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
- if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus())
+ if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
report_fatal_error("Disassembly not yet supported for subtarget");
}
@@ -74,7 +72,7 @@ static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
return OpIdx;
}
-static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
+static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
@@ -115,183 +113,160 @@ static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
return addOperand(Inst, DAsm->DecoderName(Imm)); \
}
-#define DECODE_OPERAND_REG(RegClass) \
-DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
-
-DECODE_OPERAND_REG(VGPR_32)
-DECODE_OPERAND_REG(VGPR_32_Lo128)
-DECODE_OPERAND_REG(VRegOrLds_32)
-DECODE_OPERAND_REG(VS_32)
-DECODE_OPERAND_REG(VS_64)
-DECODE_OPERAND_REG(VS_128)
-
-DECODE_OPERAND_REG(VReg_64)
-DECODE_OPERAND_REG(VReg_96)
-DECODE_OPERAND_REG(VReg_128)
-DECODE_OPERAND_REG(VReg_256)
-DECODE_OPERAND_REG(VReg_288)
-DECODE_OPERAND_REG(VReg_352)
-DECODE_OPERAND_REG(VReg_384)
-DECODE_OPERAND_REG(VReg_512)
-DECODE_OPERAND_REG(VReg_1024)
-
-DECODE_OPERAND_REG(SReg_32)
-DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
-DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
-DECODE_OPERAND_REG(SRegOrLds_32)
-DECODE_OPERAND_REG(SReg_64)
-DECODE_OPERAND_REG(SReg_64_XEXEC)
-DECODE_OPERAND_REG(SReg_128)
-DECODE_OPERAND_REG(SReg_256)
-DECODE_OPERAND_REG(SReg_512)
-
-DECODE_OPERAND_REG(AGPR_32)
-DECODE_OPERAND_REG(AReg_64)
-DECODE_OPERAND_REG(AReg_128)
-DECODE_OPERAND_REG(AReg_256)
-DECODE_OPERAND_REG(AReg_512)
-DECODE_OPERAND_REG(AReg_1024)
-DECODE_OPERAND_REG(AV_32)
-DECODE_OPERAND_REG(AV_64)
-DECODE_OPERAND_REG(AV_128)
-DECODE_OPERAND_REG(AVDst_128)
-DECODE_OPERAND_REG(AVDst_512)
-
-static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
-}
-
-static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
-}
-
-static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm));
-}
-
-static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
-}
-
-static DecodeStatus decodeOperand_VS_32(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
-}
-
-static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm));
-}
-
-static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm));
-}
-
-static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm));
-}
-
-static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm));
-}
+// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
+// number of register. Used by VGPR only and AGPR only operands.
+#define DECODE_OPERAND_REG_8(RegClass) \
+ static DecodeStatus Decode##RegClass##RegisterClass( \
+ MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
+ const MCDisassembler *Decoder) { \
+ assert(Imm < (1 << 8) && "8-bit encoding"); \
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
+ return addOperand( \
+ Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
+ }
-static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
-}
+#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
+ ImmWidth) \
+ static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
+ const MCDisassembler *Decoder) { \
+ assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
+ return addOperand(Inst, \
+ DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
+ MandatoryLiteral, ImmWidth)); \
+ }
-static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
-}
+// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
+// get register class. Used by SGPR only operands.
+#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
+ DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
+
+// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
+// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
+// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
+// Used by AV_ register classes (AGPR or VGPR only register operands).
+#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \
+ DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \
+ Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
+
+// Decoder for Src(9-bit encoding) registers only.
+#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
+
+// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
+// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
+// only.
+#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
+
+// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
+// Imm{9} is acc, registers only.
+#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
+
+// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
+// register from RegClass or immediate. Registers that don't belong to RegClass
+// will be decoded and InstPrinter will report warning. Immediate will be
+// decoded into constant of size ImmWidth, should match width of immediate used
+// by OperandType (important for floating point types).
+#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \
+ false, ImmWidth)
+
+// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
+// and decode using 'enum10' from decodeSrcOp.
+#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \
+ Imm | 512, false, ImmWidth)
+
+#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \
+ OpWidth, Imm, true, ImmWidth)
+
+// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
+// when RegisterClass is used as an operand. Most often used for destination
+// operands.
-static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
+DECODE_OPERAND_REG_8(VGPR_32)
+DECODE_OPERAND_REG_8(VGPR_32_Lo128)
+DECODE_OPERAND_REG_8(VReg_64)
+DECODE_OPERAND_REG_8(VReg_96)
+DECODE_OPERAND_REG_8(VReg_128)
+DECODE_OPERAND_REG_8(VReg_256)
+DECODE_OPERAND_REG_8(VReg_288)
+DECODE_OPERAND_REG_8(VReg_352)
+DECODE_OPERAND_REG_8(VReg_384)
+DECODE_OPERAND_REG_8(VReg_512)
+DECODE_OPERAND_REG_8(VReg_1024)
+
+DECODE_OPERAND_REG_7(SReg_32, OPW32)
+DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
+DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
+DECODE_OPERAND_REG_7(SReg_64, OPW64)
+DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_128, OPW128)
+DECODE_OPERAND_REG_7(SReg_256, OPW256)
+DECODE_OPERAND_REG_7(SReg_512, OPW512)
+
+DECODE_OPERAND_REG_8(AGPR_32)
+DECODE_OPERAND_REG_8(AReg_64)
+DECODE_OPERAND_REG_8(AReg_128)
+DECODE_OPERAND_REG_8(AReg_256)
+DECODE_OPERAND_REG_8(AReg_512)
+DECODE_OPERAND_REG_8(AReg_1024)
+
+DECODE_OPERAND_REG_AV10(AVDst_128, OPW128)
+DECODE_OPERAND_REG_AV10(AVDst_512, OPW512)
+
+// Decoders for register only source RegisterOperands that use use 9-bit Src
+// encoding: 'decodeOperand_<RegClass>'.
+
+DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32)
+DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64)
+DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128)
+DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256)
+DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32)
+
+DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32)
+
+DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32)
+DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64)
+DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
+
+// Decoders for register or immediate RegisterOperands that use 9-bit Src
+// encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'.
+
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
+
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
+
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
+
+static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
}
-static DecodeStatus
-decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- return addOperand(
- Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
-}
-
-static DecodeStatus
-decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- return addOperand(
- Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
-}
-
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
uint64_t Addr, const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
@@ -381,13 +356,6 @@ DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
Decoder);
}
-static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
-}
-
#define DECODE_SDWA(DecName) \
DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
@@ -436,7 +404,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
raw_ostream &CS) const {
- CommentStream = &CS;
bool IsSDWA = false;
unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
@@ -451,13 +418,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
- Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW,
- Address);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW,
- Address);
+ Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
convertVOP3PDPPInst(MI);
@@ -469,7 +434,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
break;
}
- Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
if (Res)
break;
}
@@ -479,8 +444,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Bytes.size() >= 8) {
const uint64_t QW = eatBytes<uint64_t>(Bytes);
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
- Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
+ Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
if (Res) {
if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
== -1)
@@ -491,37 +456,37 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
convertVOPCDPPInst(MI);
break;
}
- Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
if (Res) { IsSDWA = true; break; }
- Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
if (Res) { IsSDWA = true; break; }
- Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
if (Res) { IsSDWA = true; break; }
- if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
- Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
+ Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
if (Res)
break;
}
@@ -529,8 +494,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
// v_mad_mixhi_f16 for FMA variants. Try to decode using this special
// table first so we print the correct name.
- if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
- Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
+ Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
if (Res)
break;
}
@@ -542,64 +507,64 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Try decode 32-bit instruction
if (Bytes.size() < 4) break;
const uint32_t DW = eatBytes<uint32_t>(Bytes);
- Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
if (Res) break;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
- Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
+ Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
if (Res)
break;
}
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
- Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
+ Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
if (Res) break;
}
- Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);
if (Res) break;
if (Bytes.size() < 4) break;
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]) {
- Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
+ Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
if (Res)
break;
}
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
- Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
+ Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
if (Res)
break;
}
- Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);
if (Res)
break;
- Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
} while (false);
if (Res && AMDGPU::isMAC(MI.getOpcode())) {
@@ -627,7 +592,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
- (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) {
+ (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
// GFX90A lost TFE, its place is occupied by ACC.
int TFEOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
@@ -714,7 +679,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX11]) {
+ if (STI.hasFeature(AMDGPU::FeatureGFX11)) {
// The MCInst still has these fields even though they are no longer encoded
// in the GFX11 instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
@@ -736,12 +701,12 @@ DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
}
DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
- STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+ if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
+ STI.hasFeature(AMDGPU::FeatureGFX10)) {
if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
// VOPC - insert clamp
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
- } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
+ } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
if (SDst != -1) {
// VOPC - insert VCC register as sdst
@@ -883,6 +848,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::OpName::vdata);
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+ int RsrcIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
@@ -898,14 +865,14 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
assert(VDataIdx != -1);
if (BaseOpcode->BVH) {
// Add A16 operand for intersect_ray instructions
- if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16))
- addOperand(MI, MCOperand::createImm(1));
+ addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
return MCDisassembler::Success;
}
bool IsAtomic = (VDstIdx != -1);
bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
bool IsNSA = false;
+ bool IsPartialNSA = false;
unsigned AddrSize = Info->VAddrDwords;
if (isGFX10Plus()) {
@@ -927,9 +894,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AddrSize = 16;
} else {
if (AddrSize > Info->VAddrDwords) {
- // The NSA encoding does not contain enough operands for the combination
- // of base opcode / dimension. Should this be an error?
- return MCDisassembler::Success;
+ if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
+ // The NSA encoding does not contain enough operands for the
+ // combination of base opcode / dimension. Should this be an error?
+ return MCDisassembler::Success;
+ }
+ IsPartialNSA = true;
}
}
}
@@ -972,17 +942,20 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
}
}
- // If not using NSA on GFX10+, widen address register to correct size.
- unsigned NewVAddr0 = AMDGPU::NoRegister;
- if (isGFX10Plus() && !IsNSA && AddrSize != Info->VAddrDwords) {
- unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
- unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
- VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
-
- auto AddrRCID = MCII->get(NewOpcode).operands()[VAddr0Idx].RegClass;
- NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
+ // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
+ // If using partial NSA on GFX11+ widen last address register.
+ int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
+ unsigned NewVAddrSA = AMDGPU::NoRegister;
+ if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
+ AddrSize != Info->VAddrDwords) {
+ unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
+ unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
+ VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
+
+ auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
+ NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
&MRI.getRegClass(AddrRCID));
- if (NewVAddr0 == AMDGPU::NoRegister)
+ if (!NewVAddrSA)
return MCDisassembler::Success;
}
@@ -997,8 +970,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
}
}
- if (NewVAddr0 != AMDGPU::NoRegister) {
- MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
+ if (NewVAddrSA) {
+ MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
} else if (IsNSA) {
assert(AddrSize <= Info->VAddrDwords);
MI.erase(MI.begin() + VAddr0Idx + AddrSize,
@@ -1159,214 +1132,6 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
return createRegOperand(SRegClassID, Val >> shift);
}
-MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
- return decodeSrcOp(OPW64, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
- return decodeSrcOp(OPW128, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
- return decodeSrcOp(OPW16, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
- return decodeSrcOp(OPWV216, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const {
- return decodeSrcOp(OPWV232, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32_Lo128(unsigned Val) const {
- return createRegOperand(AMDGPU::VGPR_32_Lo128RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
- // Some instructions have operand restrictions beyond what the encoding
- // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
- // high bit.
- Val &= 255;
-
- return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
- return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_288(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_288RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_320(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_320RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_352(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_352RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_384(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_384RegClassID, Val & 255);
-}
-
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
- return decodeSrcOp(OPW64, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
- return decodeSrcOp(OPW128, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const {
- using namespace AMDGPU::EncValues;
- assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
- return decodeSrcOp(OPW128, Val | IS_VGPR);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const {
- using namespace AMDGPU::EncValues;
- assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
- return decodeSrcOp(OPW512, Val | IS_VGPR);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_288(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_288RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_320(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_320RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_352(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_352RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_384(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_384RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_1024RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
- // table-gen generated disassembler doesn't care about operand types
- // leaving only registry class so SSrc_32 operand turns into SReg_32
- // and therefore we accept immediates and literals here as well
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
- unsigned Val) const {
- // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
- return decodeOperand_SReg_32(Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
- unsigned Val) const {
- // SReg_32_XM0 is SReg_32 without EXEC_HI
- return decodeOperand_SReg_32(Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
- // table-gen generated disassembler doesn't care about operand types
- // leaving only registry class so SSrc_32 operand turns into SReg_32
- // and therefore we accept immediates and literals here as well
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
- return decodeSrcOp(OPW64, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
- return decodeSrcOp(OPW64, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
- return decodeSrcOp(OPW128, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
- return decodeDstOp(OPW256, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_288(unsigned Val) const {
- return decodeDstOp(OPW288, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_320(unsigned Val) const {
- return decodeDstOp(OPW320, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_352(unsigned Val) const {
- return decodeDstOp(OPW352, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_384(unsigned Val) const {
- return decodeDstOp(OPW384, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
- return decodeDstOp(OPW512, Val);
-}
-
// Decode Literals for insts which always have a literal in the encoding
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
@@ -1410,21 +1175,21 @@ MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
static int64_t getInlineImmVal32(unsigned Imm) {
switch (Imm) {
case 240:
- return FloatToBits(0.5f);
+ return llvm::bit_cast<uint32_t>(0.5f);
case 241:
- return FloatToBits(-0.5f);
+ return llvm::bit_cast<uint32_t>(-0.5f);
case 242:
- return FloatToBits(1.0f);
+ return llvm::bit_cast<uint32_t>(1.0f);
case 243:
- return FloatToBits(-1.0f);
+ return llvm::bit_cast<uint32_t>(-1.0f);
case 244:
- return FloatToBits(2.0f);
+ return llvm::bit_cast<uint32_t>(2.0f);
case 245:
- return FloatToBits(-2.0f);
+ return llvm::bit_cast<uint32_t>(-2.0f);
case 246:
- return FloatToBits(4.0f);
+ return llvm::bit_cast<uint32_t>(4.0f);
case 247:
- return FloatToBits(-4.0f);
+ return llvm::bit_cast<uint32_t>(-4.0f);
case 248: // 1 / (2 * PI)
return 0x3e22f983;
default:
@@ -1435,21 +1200,21 @@ static int64_t getInlineImmVal32(unsigned Imm) {
static int64_t getInlineImmVal64(unsigned Imm) {
switch (Imm) {
case 240:
- return DoubleToBits(0.5);
+ return llvm::bit_cast<uint64_t>(0.5);
case 241:
- return DoubleToBits(-0.5);
+ return llvm::bit_cast<uint64_t>(-0.5);
case 242:
- return DoubleToBits(1.0);
+ return llvm::bit_cast<uint64_t>(1.0);
case 243:
- return DoubleToBits(-1.0);
+ return llvm::bit_cast<uint64_t>(-1.0);
case 244:
- return DoubleToBits(2.0);
+ return llvm::bit_cast<uint64_t>(2.0);
case 245:
- return DoubleToBits(-2.0);
+ return llvm::bit_cast<uint64_t>(-2.0);
case 246:
- return DoubleToBits(4.0);
+ return llvm::bit_cast<uint64_t>(4.0);
case 247:
- return DoubleToBits(-4.0);
+ return llvm::bit_cast<uint64_t>(-4.0);
case 248: // 1 / (2 * PI)
return 0x3fc45f306dc9c882;
default:
@@ -1482,23 +1247,21 @@ static int64_t getInlineImmVal16(unsigned Imm) {
}
}
-MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
+MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
&& Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
// ToDo: case 248: 1/(2*PI) - is allowed only on VI
- switch (Width) {
- case OPW32:
- case OPW128: // splat constants
- case OPW512:
- case OPW1024:
- case OPWV232:
+ // ImmWidth 0 is a default case where operand should not allow immediates.
+ // Imm value is still decoded into 32 bit immediate operand, inst printer will
+ // use it to print verbose error message.
+ switch (ImmWidth) {
+ case 0:
+ case 32:
return MCOperand::createImm(getInlineImmVal32(Imm));
- case OPW64:
- case OPW256:
+ case 64:
return MCOperand::createImm(getInlineImmVal64(Imm));
- case OPW16:
- case OPWV216:
+ case 16:
return MCOperand::createImm(getInlineImmVal16(Imm));
default:
llvm_unreachable("implement me");
@@ -1612,7 +1375,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
}
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
- bool MandatoryLiteral) const {
+ bool MandatoryLiteral,
+ unsigned ImmWidth) const {
using namespace AMDGPU::EncValues;
assert(Val < 1024); // enum10
@@ -1639,7 +1403,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
return decodeIntImmed(Val);
if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
- return decodeFPImmed(Width, Val);
+ return decodeFPImmed(ImmWidth, Val);
if (Val == LITERAL_CONST) {
if (MandatoryLiteral)
@@ -1662,26 +1426,6 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
}
}
-MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
- using namespace AMDGPU::EncValues;
-
- assert(Val < 128);
- assert(Width == OPW256 || Width == OPW512);
-
- if (Val <= SGPR_MAX) {
- // "SGPR_MIN <= Val" is always true and causes compilation warning.
- static_assert(SGPR_MIN == 0);
- return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
- }
-
- int TTmpIdx = getTTmpIdx(Val);
- if (TTmpIdx >= 0) {
- return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
- }
-
- llvm_unreachable("unknown dst register");
-}
-
// Bit 0 of DstY isn't stored in the instruction, because it's always the
// opposite of bit 0 of DstX.
MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
@@ -1764,12 +1508,13 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
}
MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
- const unsigned Val) const {
+ const unsigned Val,
+ unsigned ImmWidth) const {
using namespace AMDGPU::SDWA;
using namespace AMDGPU::EncValues;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
- STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+ if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
+ STI.hasFeature(AMDGPU::FeatureGFX10)) {
// XXX: cast to int is needed to avoid stupid warning:
// compare with unsigned is always true
if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
@@ -1795,31 +1540,31 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
return decodeIntImmed(SVal);
if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
- return decodeFPImmed(Width, SVal);
+ return decodeFPImmed(ImmWidth, SVal);
return decodeSpecialReg32(SVal);
- } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
+ } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
return createRegOperand(getVgprClassId(Width), Val);
}
llvm_unreachable("unsupported target");
}
MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
- return decodeSDWASrc(OPW16, Val);
+ return decodeSDWASrc(OPW16, Val, 16);
}
MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
- return decodeSDWASrc(OPW32, Val);
+ return decodeSDWASrc(OPW32, Val, 32);
}
MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
using namespace AMDGPU::SDWA;
- assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
- STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
+ assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
+ STI.hasFeature(AMDGPU::FeatureGFX10)) &&
"SDWAVopcDst should be present only on GFX9+");
- bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
+ bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
@@ -1840,18 +1585,19 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
}
MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
- return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
- decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
+ return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
+ ? decodeSrcOp(OPW64, Val)
+ : decodeSrcOp(OPW32, Val);
}
bool AMDGPUDisassembler::isVI() const {
- return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
+ return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
}
bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
bool AMDGPUDisassembler::isGFX90A() const {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
+ return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
}
bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
@@ -1863,7 +1609,7 @@ bool AMDGPUDisassembler::isGFX10Plus() const {
}
bool AMDGPUDisassembler::isGFX11() const {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX11];
+ return STI.hasFeature(AMDGPU::FeatureGFX11);
}
bool AMDGPUDisassembler::isGFX11Plus() const {
@@ -1872,16 +1618,21 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
- return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
+ return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
}
//===----------------------------------------------------------------------===//
// AMDGPU specific symbol handling
//===----------------------------------------------------------------------===//
+#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
do { \
- KdStream << Indent << DIRECTIVE " " \
- << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
+ KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
+ } while (0)
+#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
+ do { \
+ KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
+ << GET_FIELD(MASK) << '\n'; \
} while (0)
// NOLINTNEXTLINE(readability-identifier-naming)
@@ -1896,11 +1647,11 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
// simply calculate the inverse of what the assembler does.
uint32_t GranulatedWorkitemVGPRCount =
- (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
- COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
+ GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
- uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
- AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
+ uint32_t NextFreeVGPR =
+ (GranulatedWorkitemVGPRCount + 1) *
+ AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
@@ -1924,8 +1675,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
// The disassembler cannot recover the original values of those 3 directives.
uint32_t GranulatedWavefrontSGPRCount =
- (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
- COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
+ GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
return MCDisassembler::Fail;
@@ -2035,7 +1785,46 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
return MCDisassembler::Success;
}
+// NOLINTNEXTLINE(readability-identifier-naming)
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
+ using namespace amdhsa;
+ StringRef Indent = "\t";
+ if (isGFX90A()) {
+ KdStream << Indent << ".amdhsa_accum_offset "
+ << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
+ << '\n';
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
+ return MCDisassembler::Fail;
+ PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
+ return MCDisassembler::Fail;
+ } else if (isGFX10Plus()) {
+ if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
+ PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ } else {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ }
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
+ return MCDisassembler::Fail;
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+ } else if (FourByteBuffer) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+}
+#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
#undef PRINT_DIRECTIVE
+#undef GET_FIELD
MCDisassembler::DecodeStatus
AMDGPUDisassembler::decodeKernelDescriptorDirective(
@@ -2103,30 +1892,16 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
return MCDisassembler::Success;
case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
- // COMPUTE_PGM_RSRC3
- // - Only set for GFX10, GFX6-9 have this to be 0.
- // - Currently no directives directly control this.
FourByteBuffer = DE.getU32(Cursor);
- if (!isGFX10Plus() && FourByteBuffer) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
- if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
- MCDisassembler::Fail) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
- if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
- MCDisassembler::Fail) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
using namespace amdhsa;
@@ -2161,7 +1936,7 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
}
- if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5)
+ if (AMDGPU::getAmdhsaCodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
@@ -2192,6 +1967,20 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
if (Bytes.size() != 64 || KdAddress % 64 != 0)
return MCDisassembler::Fail;
+ // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
+ // requires us to know the setting of .amdhsa_wavefront_size32 in order to
+ // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
+ // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
+ // when required.
+ if (isGFX10Plus()) {
+ uint16_t KernelCodeProperties =
+ support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
+ support::endianness::little);
+ EnableWavefrontSize32 =
+ AMDHSA_BITS_GET(KernelCodeProperties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ }
+
std::string Kd;
raw_string_ostream KdStream(Kd);
KdStream << ".amdhsa_kernel " << KdName << '\n';