diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 254 |
1 files changed, 210 insertions, 44 deletions
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 1b05acd5c90a..1f11beb71101 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -91,9 +91,11 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); int64_t Offset; - if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets. + if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets. + Offset = SignExtend64<24>(Imm); + } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets. Offset = Imm & 0xFFFFF; - } else { // GFX9+ supports 21-bit signed offsets. + } else { // GFX9+ supports 21-bit signed offsets. Offset = SignExtend64<21>(Imm); } return addOperand(Inst, MCOperand::createImm(Offset)); @@ -238,6 +240,7 @@ DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128) DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64) DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16) DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32) DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16) DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16) @@ -259,6 +262,62 @@ DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32) DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16) DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16) DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32) + +static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "10-bit encoding expected"); + assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used"); + + bool IsHi = Imm & (1 << 9); + unsigned RegIdx = Imm & 0xff; + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); +} + +static DecodeStatus +DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<8>(Imm) && "8-bit encoding expected"); + + bool IsHi = Imm & (1 << 7); + unsigned RegIdx = Imm & 0x7f; + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); +} + +static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<9>(Imm) && "9-bit encoding expected"); + + const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + bool IsVGPR = Imm & (1 << 8); + if (IsVGPR) { + bool IsHi = Imm & (1 << 7); + unsigned RegIdx = Imm & 0x7f; + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); + } + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16, + Imm & 0xFF, false, 16)); +} + +static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "10-bit encoding expected"); + + const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + bool IsVGPR = Imm & (1 << 8); + if (IsVGPR) { + bool IsHi = Imm & (1 << 9); + unsigned RegIdx = Imm & 0xff; + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); + } + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16, + Imm & 0xFF, false, 16)); +} static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -321,6 +380,15 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); } +static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const MCDisassembler *Decoder) { + assert(Imm < (1 << 9) && "9-bit encoding"); + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); + return addOperand( + Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true)); +} + static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { @@ -371,18 +439,19 @@ DECODE_SDWA(VopcDst) template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { assert(Bytes.size() >= sizeof(T)); - const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); + const auto Res = + support::endian::read<T, llvm::endianness::little>(Bytes.data()); Bytes = Bytes.slice(sizeof(T)); return Res; } static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) { assert(Bytes.size() >= 12); - uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>( - Bytes.data()); + uint64_t Lo = + support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data()); Bytes = Bytes.slice(8); - uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>( - Bytes.data()); + uint64_t Hi = + support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data()); Bytes = Bytes.slice(4); return DecoderUInt128(Lo, Hi); } @@ -418,11 +487,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // encodings if (isGFX11Plus() && Bytes.size() >= 12 ) { DecoderUInt128 DecW = eat12Bytes(Bytes); - Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS); + Res = + tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696, + MI, DecW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS); + Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696, + MI, DecW, Address, CS); if (Res) { if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) convertVOP3PDPPInst(MI); @@ -437,6 +509,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS); if (Res) break; + + Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS); + if (Res) + break; } // Reinitialize Bytes Bytes = Bytes_.slice(0, MaxInstBytesNum); @@ -461,7 +537,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableDPP8GFX1164, + DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear @@ -469,7 +546,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664, + MI, QW, Address, CS); if (Res) { if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) convertVOPCDPPInst(MI); @@ -530,9 +608,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS); + Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW, + Address, CS); if (Res) break; + Res = tryDecodeInst(DecoderTableGFX1232, MI, DW, Address, CS); + if (Res) + break; + if (Bytes.size() < 4) break; const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; @@ -560,7 +643,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableGFX1264, MI, QW, Address, CS); + if (Res) + break; + + Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW, + Address, CS); if (Res) break; @@ -640,6 +728,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = convertMIMGInst(MI); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))) + Res = convertMIMGInst(MI); + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)) Res = convertEXPInst(MI); @@ -679,7 +771,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { - if (STI.hasFeature(AMDGPU::FeatureGFX11)) { + if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) { // The MCInst still has these fields even though they are no longer encoded // in the GFX11 instruction. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm); @@ -690,9 +782,13 @@ DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 || MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 || MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 || - MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) { + MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 || + MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 || + MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) { // The MCInst has this field that is not directly encoded in the // instruction. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel); @@ -840,6 +936,7 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { // VADDR size. Consequently, decoded instructions always show address as if it // has 1 dword, which could be not really so. DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + auto TSFlags = MCII->get(MI.getOpcode()).TSFlags; int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst); @@ -848,8 +945,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AMDGPU::OpName::vdata); int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); - int RsrcIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); + int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc + : AMDGPU::OpName::rsrc; + int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); @@ -870,7 +968,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { } bool IsAtomic = (VDstIdx != -1); - bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; + bool IsGather4 = TSFlags & SIInstrFlags::Gather4; + bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE; bool IsNSA = false; bool IsPartialNSA = false; unsigned AddrSize = Info->VAddrDwords; @@ -887,10 +986,13 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AddrSize = AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI)); + // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms. + // VIMAGE insts other than BVH never use vaddr4. IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA || - Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA; + Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA || + Info->MIMGEncoding == AMDGPU::MIMGEncGfx12; if (!IsNSA) { - if (AddrSize > 12) + if (!IsVSample && AddrSize > 12) AddrSize = 16; } else { if (AddrSize > Info->VAddrDwords) { @@ -1132,6 +1234,13 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, return createRegOperand(SRegClassID, Val >> shift); } +MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx, + bool IsHi) const { + unsigned RCID = + IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID; + return createRegOperand(RCID, RegIdx); +} + // Decode Literals for insts which always have a literal in the encoding MCOperand AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { @@ -1147,7 +1256,7 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { return MCOperand::createImm(Literal); } -MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { +MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants @@ -1157,9 +1266,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { Twine(Bytes.size())); } HasLiteral = true; - Literal = eatBytes<uint32_t>(Bytes); + Literal = Literal64 = eatBytes<uint32_t>(Bytes); + if (ExtendFP64) + Literal64 <<= 32; } - return MCOperand::createImm(Literal); + return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -1376,7 +1487,7 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral, - unsigned ImmWidth) const { + unsigned ImmWidth, bool IsFP) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1388,6 +1499,20 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, return createRegOperand(IsAGPR ? getAgprClassId(Width) : getVgprClassId(Width), Val - VGPR_MIN); } + return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth, + IsFP); +} + +MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, + unsigned Val, + bool MandatoryLiteral, + unsigned ImmWidth, + bool IsFP) const { + // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been + // decoded earlier. + assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); + using namespace AMDGPU::EncValues; + if (Val <= SGPR_MAX) { // "SGPR_MIN <= Val" is always true and causes compilation warning. static_assert(SGPR_MIN == 0); @@ -1410,7 +1535,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, // Keep a sentinel value for deferred setting return MCOperand::createImm(LITERAL_CONST); else - return decodeLiteralConstant(); + return decodeLiteralConstant(IsFP && ImmWidth == 64); } switch (Width) { @@ -1616,11 +1741,18 @@ bool AMDGPUDisassembler::isGFX11Plus() const { return AMDGPU::isGFX11Plus(STI); } +bool AMDGPUDisassembler::isGFX12Plus() const { + return AMDGPU::isGFX12Plus(STI); +} bool AMDGPUDisassembler::hasArchitectedFlatScratch() const { return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); } +bool AMDGPUDisassembler::hasKernargPreload() const { + return AMDGPU::hasKernargPreload(STI); +} + //===----------------------------------------------------------------------===// // AMDGPU specific symbol handling //===----------------------------------------------------------------------===// @@ -1717,16 +1849,23 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) return MCDisassembler::Fail; - PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); + if (isGFX9Plus()) + PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL); - if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) + if (!isGFX9Plus()) + if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0) + return MCDisassembler::Fail; + if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1) return MCDisassembler::Fail; + if (!isGFX10Plus()) + if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2) + return MCDisassembler::Fail; if (isGFX10Plus()) { PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", - COMPUTE_PGM_RSRC1_WGP_MODE); - PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); - PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); + COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE); + PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED); + PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); } return MCDisassembler::Success; } @@ -1807,16 +1946,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( PRINT_PSEUDO_DIRECTIVE_COMMENT( "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); } - PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", - COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE); - PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", - COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START); - PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", - COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END); - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0) + + if (isGFX11Plus()) { + PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", + COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE); + PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", + COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START); + PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", + COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END); + } else { + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0) + return MCDisassembler::Fail; + } + + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1) return MCDisassembler::Fail; - PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", - COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START); + + if (isGFX11Plus()) { + PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", + COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START); + } else { + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2) + return MCDisassembler::Fail; + } } else if (FourByteBuffer) { return MCDisassembler::Fail; } @@ -1945,10 +2097,24 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( return MCDisassembler::Success; - case amdhsa::RESERVED2_OFFSET: - // 6 bytes from here are reserved, must be 0. - ReservedBytes = DE.getBytes(Cursor, 6); - for (int I = 0; I < 6; ++I) { + case amdhsa::KERNARG_PRELOAD_OFFSET: + using namespace amdhsa; + TwoByteBuffer = DE.getU16(Cursor); + if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) { + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length", + KERNARG_PRELOAD_SPEC_LENGTH); + } + + if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) { + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset", + KERNARG_PRELOAD_SPEC_OFFSET); + } + return MCDisassembler::Success; + + case amdhsa::RESERVED3_OFFSET: + // 4 bytes from here are reserved, must be 0. + ReservedBytes = DE.getBytes(Cursor, 4); + for (int I = 0; I < 4; ++I) { if (ReservedBytes[I] != 0) return MCDisassembler::Fail; } @@ -1975,7 +2141,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( if (isGFX10Plus()) { uint16_t KernelCodeProperties = support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET], - support::endianness::little); + llvm::endianness::little); EnableWavefrontSize32 = AMDHSA_BITS_GET(KernelCodeProperties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); |
