aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-18 20:30:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-19 21:23:40 +0000
commitbdbe302c3396ceb4dd89d1214485439598f05368 (patch)
treeccf66c6349b23061ed5e9645c21f15fbe718da8b /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
parente7a1904fe1ced461b2a31f03b6592ae6564a243a (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp324
1 files changed, 273 insertions, 51 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1b05acd5c90a..ed2e7e4f189e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -91,9 +91,11 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
int64_t Offset;
- if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
+ if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
+ Offset = SignExtend64<24>(Imm);
+ } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
Offset = Imm & 0xFFFFF;
- } else { // GFX9+ supports 21-bit signed offsets.
+ } else { // GFX9+ supports 21-bit signed offsets.
Offset = SignExtend64<21>(Imm);
}
return addOperand(Inst, MCOperand::createImm(Offset));
@@ -105,6 +107,13 @@ static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
return addOperand(Inst, DAsm->decodeBoolReg(Val));
}
+static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
+}
+
#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
uint64_t /*Addr*/, \
@@ -200,10 +209,12 @@ DECODE_OPERAND_REG_8(VReg_512)
DECODE_OPERAND_REG_8(VReg_1024)
DECODE_OPERAND_REG_7(SReg_32, OPW32)
+DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
DECODE_OPERAND_REG_7(SReg_64, OPW64)
DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_96, OPW96)
DECODE_OPERAND_REG_7(SReg_128, OPW128)
DECODE_OPERAND_REG_7(SReg_256, OPW256)
DECODE_OPERAND_REG_7(SReg_512, OPW512)
@@ -238,6 +249,7 @@ DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
@@ -259,6 +271,62 @@ DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32)
+
+static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "10-bit encoding expected");
+ assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
+
+ bool IsHi = Imm & (1 << 9);
+ unsigned RegIdx = Imm & 0xff;
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+}
+
+static DecodeStatus
+DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<8>(Imm) && "8-bit encoding expected");
+
+ bool IsHi = Imm & (1 << 7);
+ unsigned RegIdx = Imm & 0x7f;
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+}
+
+static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<9>(Imm) && "9-bit encoding expected");
+
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ bool IsVGPR = Imm & (1 << 8);
+ if (IsVGPR) {
+ bool IsHi = Imm & (1 << 7);
+ unsigned RegIdx = Imm & 0x7f;
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+ }
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
+ Imm & 0xFF, false, 16));
+}
+
+static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "10-bit encoding expected");
+
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ bool IsVGPR = Imm & (1 << 8);
+ if (IsVGPR) {
+ bool IsHi = Imm & (1 << 9);
+ unsigned RegIdx = Imm & 0xff;
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+ }
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
+ Imm & 0xFF, false, 16));
+}
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
uint64_t Addr,
@@ -321,6 +389,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
}
+static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ assert(Imm < (1 << 9) && "9-bit encoding");
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(
+ Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));
+}
+
static DecodeStatus
DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
const MCDisassembler *Decoder) {
@@ -371,18 +448,19 @@ DECODE_SDWA(VopcDst)
template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
assert(Bytes.size() >= sizeof(T));
- const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
+ const auto Res =
+ support::endian::read<T, llvm::endianness::little>(Bytes.data());
Bytes = Bytes.slice(sizeof(T));
return Res;
}
static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
assert(Bytes.size() >= 12);
- uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>(
- Bytes.data());
+ uint64_t Lo =
+ support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
Bytes = Bytes.slice(8);
- uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>(
- Bytes.data());
+ uint64_t Hi =
+ support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
Bytes = Bytes.slice(4);
return DecoderUInt128(Lo, Hi);
}
@@ -418,25 +496,48 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
- Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);
+ Res =
+ tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
+ MI, DecW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);
- if (Res) {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
+ Res =
+ tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,
+ MI, DecW, Address, CS);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+ MI = MCInst(); // clear
+
+ const auto convertVOPDPP = [&]() {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) {
convertVOP3PDPPInst(MI);
- else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
+ } else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) {
convertVOPCDPPInst(MI); // Special VOP3 case
- else {
+ } else {
assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
convertVOP3DPPInst(MI); // Regular VOP3 case
}
+ };
+ Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
+ MI, DecW, Address, CS);
+ if (Res) {
+ convertVOPDPP();
+ break;
+ }
+ Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696,
+ MI, DecW, Address, CS);
+ if (Res) {
+ convertVOPDPP();
break;
}
Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
if (Res)
break;
+
+ Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);
+ if (Res)
+ break;
}
// Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -461,7 +562,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1164,
+ DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+ MI = MCInst(); // clear
+
+ Res = tryDecodeInst(DecoderTableDPP8GFX1264,
+ DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
@@ -469,7 +577,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
+ MI, QW, Address, CS);
+ if (Res) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
+ convertVOPCDPPInst(MI);
+ break;
+ }
+
+ Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664,
+ MI, QW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
convertVOPCDPPInst(MI);
@@ -530,9 +647,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);
+ Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
+ Address, CS);
if (Res) break;
+ Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
+ Address, CS);
+ if (Res)
+ break;
+
if (Bytes.size() < 4) break;
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
@@ -560,7 +683,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
+ Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
+ Address, CS);
if (Res)
break;
@@ -640,6 +769,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = convertMIMGInst(MI);
}
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)))
+ Res = convertMIMGInst(MI);
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
Res = convertEXPInst(MI);
@@ -679,7 +812,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
- if (STI.hasFeature(AMDGPU::FeatureGFX11)) {
+ if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
// The MCInst still has these fields even though they are no longer encoded
// in the GFX11 instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
@@ -690,9 +823,13 @@ DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
// The MCInst has this field that is not directly encoded in the
// instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
@@ -840,6 +977,7 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
+ auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdst);
@@ -848,8 +986,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::OpName::vdata);
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
- int RsrcIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
+ int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
+ int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
@@ -870,7 +1009,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
}
bool IsAtomic = (VDstIdx != -1);
- bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
+ bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
+ bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
bool IsNSA = false;
bool IsPartialNSA = false;
unsigned AddrSize = Info->VAddrDwords;
@@ -887,10 +1027,13 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
+ // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
+ // VIMAGE insts other than BVH never use vaddr4.
IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
- Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
if (!IsNSA) {
- if (AddrSize > 12)
+ if (!IsVSample && AddrSize > 12)
AddrSize = 16;
} else {
if (AddrSize > Info->VAddrDwords) {
@@ -1098,6 +1241,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
case AMDGPU::TTMP_64RegClassID:
shift = 1;
break;
+ case AMDGPU::SGPR_96RegClassID:
+ case AMDGPU::TTMP_96RegClassID:
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::TTMP_128RegClassID:
// ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
@@ -1132,6 +1277,13 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
return createRegOperand(SRegClassID, Val >> shift);
}
+MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
+ bool IsHi) const {
+ unsigned RCID =
+ IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID;
+ return createRegOperand(RCID, RegIdx);
+}
+
// Decode Literals for insts which always have a literal in the encoding
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
@@ -1147,7 +1299,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
return MCOperand::createImm(Literal);
}
-MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
+MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
// ToDo: deal with float/double constants
@@ -1157,9 +1309,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
Twine(Bytes.size()));
}
HasLiteral = true;
- Literal = eatBytes<uint32_t>(Bytes);
+ Literal = Literal64 = eatBytes<uint32_t>(Bytes);
+ if (ExtendFP64)
+ Literal64 <<= 32;
}
- return MCOperand::createImm(Literal);
+ return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
}
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
@@ -1376,7 +1530,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
bool MandatoryLiteral,
- unsigned ImmWidth) const {
+ unsigned ImmWidth, bool IsFP) const {
using namespace AMDGPU::EncValues;
assert(Val < 1024); // enum10
@@ -1388,6 +1542,20 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
return createRegOperand(IsAGPR ? getAgprClassId(Width)
: getVgprClassId(Width), Val - VGPR_MIN);
}
+ return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
+ IsFP);
+}
+
+MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
+ unsigned Val,
+ bool MandatoryLiteral,
+ unsigned ImmWidth,
+ bool IsFP) const {
+ // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
+ // decoded earlier.
+ assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
+ using namespace AMDGPU::EncValues;
+
if (Val <= SGPR_MAX) {
// "SGPR_MIN <= Val" is always true and causes compilation warning.
static_assert(SGPR_MIN == 0);
@@ -1410,7 +1578,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
// Keep a sentinel value for deferred setting
return MCOperand::createImm(LITERAL_CONST);
else
- return decodeLiteralConstant();
+ return decodeLiteralConstant(IsFP && ImmWidth == 64);
}
switch (Width) {
@@ -1590,6 +1758,10 @@ MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
: decodeSrcOp(OPW32, Val);
}
+MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
+ return decodeSrcOp(OPW32, Val);
+}
+
bool AMDGPUDisassembler::isVI() const {
return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
}
@@ -1616,11 +1788,18 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
return AMDGPU::isGFX11Plus(STI);
}
+bool AMDGPUDisassembler::isGFX12Plus() const {
+ return AMDGPU::isGFX12Plus(STI);
+}
bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
}
+bool AMDGPUDisassembler::hasKernargPreload() const {
+ return AMDGPU::hasKernargPreload(STI);
+}
+
//===----------------------------------------------------------------------===//
// AMDGPU specific symbol handling
//===----------------------------------------------------------------------===//
@@ -1704,12 +1883,16 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
return MCDisassembler::Fail;
- PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
+ if (!isGFX12Plus())
+ PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
return MCDisassembler::Fail;
- PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+ if (!isGFX12Plus())
+ PRINT_DIRECTIVE(".amdhsa_ieee_mode",
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
return MCDisassembler::Fail;
@@ -1717,17 +1900,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
return MCDisassembler::Fail;
- PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
+ if (isGFX9Plus())
+ PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
- if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
+ if (!isGFX9Plus())
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
+ return MCDisassembler::Fail;
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
return MCDisassembler::Fail;
+ if (!isGFX10Plus())
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
+ return MCDisassembler::Fail;
if (isGFX10Plus()) {
PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
- COMPUTE_PGM_RSRC1_WGP_MODE);
- PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
- PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+ PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
+ PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
}
+
+ if (isGFX12Plus())
+ PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
+
return MCDisassembler::Success;
}
@@ -1807,16 +2002,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
PRINT_PSEUDO_DIRECTIVE_COMMENT(
"SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
- PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
- PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
- PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
+
+ if (isGFX11Plus()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
+ } else {
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
+ return MCDisassembler::Fail;
+ }
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
return MCDisassembler::Fail;
- PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+
+ if (isGFX11Plus()) {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+ } else {
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
+ return MCDisassembler::Fail;
+ }
} else if (FourByteBuffer) {
return MCDisassembler::Fail;
}
@@ -1945,10 +2153,24 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
return MCDisassembler::Success;
- case amdhsa::RESERVED2_OFFSET:
- // 6 bytes from here are reserved, must be 0.
- ReservedBytes = DE.getBytes(Cursor, 6);
- for (int I = 0; I < 6; ++I) {
+ case amdhsa::KERNARG_PRELOAD_OFFSET:
+ using namespace amdhsa;
+ TwoByteBuffer = DE.getU16(Cursor);
+ if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
+ KERNARG_PRELOAD_SPEC_LENGTH);
+ }
+
+ if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
+ KERNARG_PRELOAD_SPEC_OFFSET);
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::RESERVED3_OFFSET:
+ // 4 bytes from here are reserved, must be 0.
+ ReservedBytes = DE.getBytes(Cursor, 4);
+ for (int I = 0; I < 4; ++I) {
if (ReservedBytes[I] != 0)
return MCDisassembler::Fail;
}
@@ -1975,7 +2197,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
if (isGFX10Plus()) {
uint16_t KernelCodeProperties =
support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
- support::endianness::little);
+ llvm::endianness::little);
EnableWavefrontSize32 =
AMDHSA_BITS_GET(KernelCodeProperties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
@@ -2018,7 +2240,7 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
// Code Object V3 kernel descriptors.
StringRef Name = Symbol.Name;
- if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
+ if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
Size = 64; // Size = 64 regardless of success or failure.
return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
}