src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-12-18 20:30:12 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2024-04-19 21:23:40 +0000
commit	bdbe302c3396ceb4dd89d1214485439598f05368 (patch)
tree	ccf66c6349b23061ed5e9645c21f15fbe718da8b /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
parent	e7a1904fe1ced461b2a31f03b6592ae6564a243a (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

324

1 files changed, 273 insertions, 51 deletions

diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1b05acd5c90a..ed2e7e4f189e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

@@ -91,9 +91,11 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,

const MCDisassembler *Decoder) {

auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);

int64_t Offset;

- if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.

+ if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.

+ Offset = SignExtend64<24>(Imm);

+ } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.

Offset = Imm & 0xFFFFF;

- } else { // GFX9+ supports 21-bit signed offsets.

+ } else { // GFX9+ supports 21-bit signed offsets.

Offset = SignExtend64<21>(Imm);

}

return addOperand(Inst, MCOperand::createImm(Offset));

@@ -105,6 +107,13 @@ static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,

return addOperand(Inst, DAsm->decodeBoolReg(Val));

}

+static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,

+ uint64_t Addr,

+ const MCDisassembler *Decoder) {

+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);

+ return addOperand(Inst, DAsm->decodeSplitBarrier(Val));

#define DECODE_OPERAND(StaticDecoderName, DecoderName) \

static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \

uint64_t /*Addr*/, \

@@ -200,10 +209,12 @@ DECODE_OPERAND_REG_8(VReg_512)

DECODE_OPERAND_REG_8(VReg_1024)

DECODE_OPERAND_REG_7(SReg_32, OPW32)

+DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)

DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)

DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)

DECODE_OPERAND_REG_7(SReg_64, OPW64)

DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)

+DECODE_OPERAND_REG_7(SReg_96, OPW96)

DECODE_OPERAND_REG_7(SReg_128, OPW128)

DECODE_OPERAND_REG_7(SReg_256, OPW256)

DECODE_OPERAND_REG_7(SReg_512, OPW512)

@@ -238,6 +249,7 @@ DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)

DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)

DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)

+DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)

DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)

DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)

DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)

@@ -259,6 +271,62 @@ DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)

DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)

DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)

DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)

+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32)

+static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,

+ uint64_t /*Addr*/,

+ const MCDisassembler *Decoder) {

+ assert(isUInt<10>(Imm) && "10-bit encoding expected");

+ assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");

+ bool IsHi = Imm & (1 << 9);

+ unsigned RegIdx = Imm & 0xff;

+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);

+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

+static DecodeStatus

+DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,

+ const MCDisassembler *Decoder) {

+ assert(isUInt<8>(Imm) && "8-bit encoding expected");

+ bool IsHi = Imm & (1 << 7);

+ unsigned RegIdx = Imm & 0x7f;

+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);

+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

+static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,

+ uint64_t /*Addr*/,

+ const MCDisassembler *Decoder) {

+ assert(isUInt<9>(Imm) && "9-bit encoding expected");

+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);

+ bool IsVGPR = Imm & (1 << 8);

+ if (IsVGPR) {

+ bool IsHi = Imm & (1 << 7);

+ unsigned RegIdx = Imm & 0x7f;

+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

+ }

+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,

+ Imm & 0xFF, false, 16));

+static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,

+ uint64_t /*Addr*/,

+ const MCDisassembler *Decoder) {

+ assert(isUInt<10>(Imm) && "10-bit encoding expected");

+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);

+ bool IsVGPR = Imm & (1 << 8);

+ if (IsVGPR) {

+ bool IsHi = Imm & (1 << 9);

+ unsigned RegIdx = Imm & 0xff;

+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));

+ }

+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,

+ Imm & 0xFF, false, 16));

static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,

uint64_t Addr,

@@ -321,6 +389,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,

return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));

}

+static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,

+ uint64_t Addr,

+ const MCDisassembler *Decoder) {

+ assert(Imm < (1 << 9) && "9-bit encoding");

+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);

+ return addOperand(

+ Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));

static DecodeStatus

DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,

const MCDisassembler *Decoder) {

@@ -371,18 +448,19 @@ DECODE_SDWA(VopcDst)

template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {

assert(Bytes.size() >= sizeof(T));

- const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());

+ const auto Res =

+ support::endian::read<T, llvm::endianness::little>(Bytes.data());

Bytes = Bytes.slice(sizeof(T));

return Res;

}

static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {

assert(Bytes.size() >= 12);

- uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>(

- Bytes.data());

+ uint64_t Lo =

+ support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());

Bytes = Bytes.slice(8);

- uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>(

- Bytes.data());

+ uint64_t Hi =

+ support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());

Bytes = Bytes.slice(4);

return DecoderUInt128(Lo, Hi);

}

@@ -418,25 +496,48 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

// encodings

if (isGFX11Plus() && Bytes.size() >= 12 ) {

DecoderUInt128 DecW = eat12Bytes(Bytes);

- Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);

+ Res =

+ tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,

+ MI, DecW, Address, CS);

if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)

break;

MI = MCInst(); // clear

- Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);

- if (Res) {

- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)

+ Res =

+ tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,

+ MI, DecW, Address, CS);

+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)

+ break;

+ MI = MCInst(); // clear

+ const auto convertVOPDPP = [&]() {

+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) {

convertVOP3PDPPInst(MI);

- else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))

+ } else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) {

convertVOPCDPPInst(MI); // Special VOP3 case

- else {

+ } else {

assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);

convertVOP3DPPInst(MI); // Regular VOP3 case

}

+ };

+ Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,

+ MI, DecW, Address, CS);

+ if (Res) {

+ convertVOPDPP();

+ break;

+ }

+ Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696,

+ MI, DecW, Address, CS);

+ if (Res) {

+ convertVOPDPP();

break;

}

Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);

if (Res)

break;

+ Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);

+ if (Res)

+ break;

}

// Reinitialize Bytes

Bytes = Bytes_.slice(0, MaxInstBytesNum);

@@ -461,7 +562,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

break;

MI = MCInst(); // clear

- Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);

+ Res = tryDecodeInst(DecoderTableDPP8GFX1164,

+ DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);

+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)

+ break;

+ MI = MCInst(); // clear

+ Res = tryDecodeInst(DecoderTableDPP8GFX1264,

+ DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);

if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)

break;

MI = MCInst(); // clear

@@ -469,7 +577,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);

if (Res) break;

- Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);

+ Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,

+ MI, QW, Address, CS);

+ if (Res) {

+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)

+ convertVOPCDPPInst(MI);

+ break;

+ }

+ Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664,

+ MI, QW, Address, CS);

if (Res) {

if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)

convertVOPCDPPInst(MI);

@@ -530,9 +647,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);

if (Res) break;

- Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);

+ Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,

+ Address, CS);

if (Res) break;

+ Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,

+ Address, CS);

+ if (Res)

+ break;

if (Bytes.size() < 4) break;

const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;

@@ -560,7 +683,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);

if (Res) break;

- Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);

+ Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,

+ Address, CS);

+ if (Res)

+ break;

+ Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,

+ Address, CS);

if (Res)

break;

@@ -640,6 +769,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

Res = convertMIMGInst(MI);

}

+ if (Res && (MCII->get(MI.getOpcode()).TSFlags &

+ (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)))

+ Res = convertMIMGInst(MI);

if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))

Res = convertEXPInst(MI);

@@ -679,7 +812,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

}

DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {

- if (STI.hasFeature(AMDGPU::FeatureGFX11)) {

+ if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {

// The MCInst still has these fields even though they are no longer encoded

// in the GFX11 instruction.

insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);

@@ -690,9 +823,13 @@ DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {

DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {

if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||

+ MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||

MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||

+ MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||

MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||

- MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {

+ MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||

+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||

+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {

// The MCInst has this field that is not directly encoded in the

// instruction.

insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);

@@ -840,6 +977,7 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {

// VADDR size. Consequently, decoded instructions always show address as if it

// has 1 dword, which could be not really so.

DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {

+ auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;

int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),

AMDGPU::OpName::vdst);

@@ -848,8 +986,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {

AMDGPU::OpName::vdata);

int VAddr0Idx =

AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);

- int RsrcIdx =

- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);

+ int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc

+ : AMDGPU::OpName::rsrc;

+ int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);

int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),

AMDGPU::OpName::dmask);

@@ -870,7 +1009,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {

}

bool IsAtomic = (VDstIdx != -1);

- bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;

+ bool IsGather4 = TSFlags & SIInstrFlags::Gather4;

+ bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;

bool IsNSA = false;

bool IsPartialNSA = false;

unsigned AddrSize = Info->VAddrDwords;

@@ -887,10 +1027,13 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {

AddrSize =

AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));

+ // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.

+ // VIMAGE insts other than BVH never use vaddr4.

IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||

- Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;

+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||

+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;

if (!IsNSA) {

- if (AddrSize > 12)

+ if (!IsVSample && AddrSize > 12)

AddrSize = 16;

} else {

if (AddrSize > Info->VAddrDwords) {

@@ -1098,6 +1241,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,

case AMDGPU::TTMP_64RegClassID:

shift = 1;

break;

+ case AMDGPU::SGPR_96RegClassID:

+ case AMDGPU::TTMP_96RegClassID:

case AMDGPU::SGPR_128RegClassID:

case AMDGPU::TTMP_128RegClassID:

// ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in

@@ -1132,6 +1277,13 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,

return createRegOperand(SRegClassID, Val >> shift);

}

+MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,

+ bool IsHi) const {

+ unsigned RCID =

+ IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID;

+ return createRegOperand(RCID, RegIdx);

// Decode Literals for insts which always have a literal in the encoding

MCOperand

AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {

@@ -1147,7 +1299,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {

return MCOperand::createImm(Literal);

}

-MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {

+MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {

// For now all literal constants are supposed to be unsigned integer

// ToDo: deal with signed/unsigned 64-bit integer constants

// ToDo: deal with float/double constants

@@ -1157,9 +1309,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {

Twine(Bytes.size()));

}

HasLiteral = true;

- Literal = eatBytes<uint32_t>(Bytes);

+ Literal = Literal64 = eatBytes<uint32_t>(Bytes);

+ if (ExtendFP64)

+ Literal64 <<= 32;

}

- return MCOperand::createImm(Literal);

+ return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);

}

MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {

@@ -1376,7 +1530,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {

MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,

bool MandatoryLiteral,

- unsigned ImmWidth) const {

+ unsigned ImmWidth, bool IsFP) const {

using namespace AMDGPU::EncValues;

assert(Val < 1024); // enum10

@@ -1388,6 +1542,20 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,

return createRegOperand(IsAGPR ? getAgprClassId(Width)

: getVgprClassId(Width), Val - VGPR_MIN);

}

+ return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,

+ IsFP);

+MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,

+ unsigned Val,

+ bool MandatoryLiteral,

+ unsigned ImmWidth,

+ bool IsFP) const {

+ // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been

+ // decoded earlier.

+ assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");

+ using namespace AMDGPU::EncValues;

if (Val <= SGPR_MAX) {

// "SGPR_MIN <= Val" is always true and causes compilation warning.

static_assert(SGPR_MIN == 0);

@@ -1410,7 +1578,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,

// Keep a sentinel value for deferred setting

return MCOperand::createImm(LITERAL_CONST);

else

- return decodeLiteralConstant();

+ return decodeLiteralConstant(IsFP && ImmWidth == 64);

}

switch (Width) {

@@ -1590,6 +1758,10 @@ MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {

: decodeSrcOp(OPW32, Val);

}

+MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {

+ return decodeSrcOp(OPW32, Val);

bool AMDGPUDisassembler::isVI() const {

return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);

}

@@ -1616,11 +1788,18 @@ bool AMDGPUDisassembler::isGFX11Plus() const {

return AMDGPU::isGFX11Plus(STI);

}

+bool AMDGPUDisassembler::isGFX12Plus() const {

+ return AMDGPU::isGFX12Plus(STI);

bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {

return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);

}

+bool AMDGPUDisassembler::hasKernargPreload() const {

+ return AMDGPU::hasKernargPreload(STI);

//===----------------------------------------------------------------------===//

// AMDGPU specific symbol handling

//===----------------------------------------------------------------------===//

@@ -1704,12 +1883,16 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(

if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)

return MCDisassembler::Fail;

- PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);

+ if (!isGFX12Plus())

+ PRINT_DIRECTIVE(".amdhsa_dx10_clamp",

+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);

if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)

return MCDisassembler::Fail;

- PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);

+ if (!isGFX12Plus())

+ PRINT_DIRECTIVE(".amdhsa_ieee_mode",

+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);

if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)

return MCDisassembler::Fail;

@@ -1717,17 +1900,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(

if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)

return MCDisassembler::Fail;

- PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);

+ if (isGFX9Plus())

+ PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);

- if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)

+ if (!isGFX9Plus())

+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)

+ return MCDisassembler::Fail;

+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)

return MCDisassembler::Fail;

+ if (!isGFX10Plus())

+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)

+ return MCDisassembler::Fail;

if (isGFX10Plus()) {

PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",

- COMPUTE_PGM_RSRC1_WGP_MODE);

- PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);

- PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);

+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);

+ PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);

+ PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);

}

+ if (isGFX12Plus())

+ PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",

+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);

return MCDisassembler::Success;

}

@@ -1807,16 +2002,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(

PRINT_PSEUDO_DIRECTIVE_COMMENT(

"SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);

}

- PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",

- COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);

- PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",

- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);

- PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",

- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);

- if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)

+ if (isGFX11Plus()) {

+ PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",

+ COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);

+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",

+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);

+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",

+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);

+ } else {

+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)

+ return MCDisassembler::Fail;

+ }

+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)

return MCDisassembler::Fail;

- PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",

- COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);

+ if (isGFX11Plus()) {

+ PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",

+ COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);

+ } else {

+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)

+ return MCDisassembler::Fail;

+ }

} else if (FourByteBuffer) {

return MCDisassembler::Fail;

}

@@ -1945,10 +2153,24 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(

return MCDisassembler::Success;

- case amdhsa::RESERVED2_OFFSET:

- // 6 bytes from here are reserved, must be 0.

- ReservedBytes = DE.getBytes(Cursor, 6);

- for (int I = 0; I < 6; ++I) {

+ case amdhsa::KERNARG_PRELOAD_OFFSET:

+ using namespace amdhsa;

+ TwoByteBuffer = DE.getU16(Cursor);

+ if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {

+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",

+ KERNARG_PRELOAD_SPEC_LENGTH);

+ }

+ if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {

+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",

+ KERNARG_PRELOAD_SPEC_OFFSET);

+ }

+ return MCDisassembler::Success;

+ case amdhsa::RESERVED3_OFFSET:

+ // 4 bytes from here are reserved, must be 0.

+ ReservedBytes = DE.getBytes(Cursor, 4);

+ for (int I = 0; I < 4; ++I) {

if (ReservedBytes[I] != 0)

return MCDisassembler::Fail;

}

@@ -1975,7 +2197,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(

if (isGFX10Plus()) {

uint16_t KernelCodeProperties =

support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],

- support::endianness::little);

+ llvm::endianness::little);

EnableWavefrontSize32 =

AMDHSA_BITS_GET(KernelCodeProperties,

amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);

@@ -2018,7 +2240,7 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,

// Code Object V3 kernel descriptors.

StringRef Name = Symbol.Name;

- if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {

+ if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {

Size = 64; // Size = 64 regardless of success or failure.

return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);

}