diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-09-02 21:17:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-01-07 23:04:38 +0000 |
commit | 0e1e0ce556810ad5f9d45485e686f0653530516c (patch) | |
tree | ab02ce7c4fafc0518430e9cec77d41201bce23f0 /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | |
parent | c3eb0b7c19221f3a2133ab14d3ffffa61ec0c4bc (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 875 |
1 files changed, 332 insertions, 543 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index c4e85210848a..1b05acd5c90a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -45,13 +45,11 @@ using namespace llvm; using DecodeStatus = llvm::MCDisassembler::DecodeStatus; AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, - MCContext &Ctx, - MCInstrInfo const *MCII) : - MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), - TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) { - + MCContext &Ctx, MCInstrInfo const *MCII) + : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), + MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) { // ToDo: AMDGPUDisassembler supports only VI ISA. - if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus()) + if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus()) report_fatal_error("Disassembly not yet supported for subtarget"); } @@ -74,7 +72,7 @@ static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, return OpIdx; } -static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, +static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); @@ -115,183 +113,160 @@ static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, return addOperand(Inst, DAsm->DecoderName(Imm)); \ } -#define DECODE_OPERAND_REG(RegClass) \ -DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) - -DECODE_OPERAND_REG(VGPR_32) -DECODE_OPERAND_REG(VGPR_32_Lo128) -DECODE_OPERAND_REG(VRegOrLds_32) -DECODE_OPERAND_REG(VS_32) -DECODE_OPERAND_REG(VS_64) -DECODE_OPERAND_REG(VS_128) - -DECODE_OPERAND_REG(VReg_64) -DECODE_OPERAND_REG(VReg_96) -DECODE_OPERAND_REG(VReg_128) -DECODE_OPERAND_REG(VReg_256) -DECODE_OPERAND_REG(VReg_288) -DECODE_OPERAND_REG(VReg_352) -DECODE_OPERAND_REG(VReg_384) -DECODE_OPERAND_REG(VReg_512) -DECODE_OPERAND_REG(VReg_1024) - -DECODE_OPERAND_REG(SReg_32) -DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) -DECODE_OPERAND_REG(SReg_32_XEXEC_HI) -DECODE_OPERAND_REG(SRegOrLds_32) -DECODE_OPERAND_REG(SReg_64) -DECODE_OPERAND_REG(SReg_64_XEXEC) -DECODE_OPERAND_REG(SReg_128) -DECODE_OPERAND_REG(SReg_256) -DECODE_OPERAND_REG(SReg_512) - -DECODE_OPERAND_REG(AGPR_32) -DECODE_OPERAND_REG(AReg_64) -DECODE_OPERAND_REG(AReg_128) -DECODE_OPERAND_REG(AReg_256) -DECODE_OPERAND_REG(AReg_512) -DECODE_OPERAND_REG(AReg_1024) -DECODE_OPERAND_REG(AV_32) -DECODE_OPERAND_REG(AV_64) -DECODE_OPERAND_REG(AV_128) -DECODE_OPERAND_REG(AVDst_128) -DECODE_OPERAND_REG(AVDst_512) - -static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); -} - -static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); -} - -static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm)); -} - -static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); -} - -static DecodeStatus decodeOperand_VS_32(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm)); -} - -static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512)); -} - -static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512)); -} - -static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512)); -} - -static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512)); -} - -static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512)); -} - -static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm)); -} - -static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm)); -} - -static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm)); -} - -static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm)); -} +// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is +// number of register. Used by VGPR only and AGPR only operands. +#define DECODE_OPERAND_REG_8(RegClass) \ + static DecodeStatus Decode##RegClass##RegisterClass( \ + MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \ + const MCDisassembler *Decoder) { \ + assert(Imm < (1 << 8) && "8-bit encoding"); \ + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \ + return addOperand( \ + Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \ + } -static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm)); -} +#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \ + ImmWidth) \ + static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \ + const MCDisassembler *Decoder) { \ + assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \ + auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \ + return addOperand(Inst, \ + DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \ + MandatoryLiteral, ImmWidth)); \ + } -static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); - return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm)); -} +// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to +// get register class. Used by SGPR only operands. +#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \ + DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0) + +// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register, +// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC). +// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp. +// Used by AV_ register classes (AGPR or VGPR only register operands). +#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \ + DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \ + Imm | AMDGPU::EncValues::IS_VGPR, false, 0) + +// Decoder for Src(9-bit encoding) registers only. +#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \ + DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0) + +// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set +// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers +// only. +#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \ + DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0) + +// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding +// Imm{9} is acc, registers only. +#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \ + DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0) + +// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be +// register from RegClass or immediate. Registers that don't belong to RegClass +// will be decoded and InstPrinter will report warning. Immediate will be +// decoded into constant of size ImmWidth, should match width of immediate used +// by OperandType (important for floating point types). +#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \ + DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \ + false, ImmWidth) + +// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc) +// and decode using 'enum10' from decodeSrcOp. +#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \ + DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \ + Imm | 512, false, ImmWidth) + +#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \ + DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \ + OpWidth, Imm, true, ImmWidth) + +// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass' +// when RegisterClass is used as an operand. Most often used for destination +// operands. -static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { +DECODE_OPERAND_REG_8(VGPR_32) +DECODE_OPERAND_REG_8(VGPR_32_Lo128) +DECODE_OPERAND_REG_8(VReg_64) +DECODE_OPERAND_REG_8(VReg_96) +DECODE_OPERAND_REG_8(VReg_128) +DECODE_OPERAND_REG_8(VReg_256) +DECODE_OPERAND_REG_8(VReg_288) +DECODE_OPERAND_REG_8(VReg_352) +DECODE_OPERAND_REG_8(VReg_384) +DECODE_OPERAND_REG_8(VReg_512) +DECODE_OPERAND_REG_8(VReg_1024) + +DECODE_OPERAND_REG_7(SReg_32, OPW32) +DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32) +DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32) +DECODE_OPERAND_REG_7(SReg_64, OPW64) +DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64) +DECODE_OPERAND_REG_7(SReg_128, OPW128) +DECODE_OPERAND_REG_7(SReg_256, OPW256) +DECODE_OPERAND_REG_7(SReg_512, OPW512) + +DECODE_OPERAND_REG_8(AGPR_32) +DECODE_OPERAND_REG_8(AReg_64) +DECODE_OPERAND_REG_8(AReg_128) +DECODE_OPERAND_REG_8(AReg_256) +DECODE_OPERAND_REG_8(AReg_512) +DECODE_OPERAND_REG_8(AReg_1024) + +DECODE_OPERAND_REG_AV10(AVDst_128, OPW128) +DECODE_OPERAND_REG_AV10(AVDst_512, OPW512) + +// Decoders for register only source RegisterOperands that use use 9-bit Src +// encoding: 'decodeOperand_<RegClass>'. + +DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32) +DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64) +DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128) +DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256) +DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32) + +DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32) + +DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32) +DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64) +DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128) + +// Decoders for register or immediate RegisterOperands that use 9-bit Src +// encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'. + +DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64) +DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32) + +DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64) +DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64) +DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32) + +DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16) +DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16) +DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32) + +static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const MCDisassembler *Decoder) { const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm)); } -static DecodeStatus -decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr, - const MCDisassembler *Decoder) { - const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); - return addOperand( - Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true)); -} - -static DecodeStatus -decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr, - const MCDisassembler *Decoder) { - const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); - return addOperand( - Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true)); -} - static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder) { const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); @@ -381,13 +356,6 @@ DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, Decoder); } -static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm, - uint64_t Addr, - const MCDisassembler *Decoder) { - auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); - return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm)); -} - #define DECODE_SDWA(DecName) \ DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) @@ -436,7 +404,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes_, uint64_t Address, raw_ostream &CS) const { - CommentStream = &CS; bool IsSDWA = false; unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size()); @@ -451,13 +418,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // encodings if (isGFX11Plus() && Bytes.size() >= 12 ) { DecoderUInt128 DecW = eat12Bytes(Bytes); - Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, - Address); + Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, - Address); + Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS); if (Res) { if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) convertVOP3PDPPInst(MI); @@ -469,7 +434,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } break; } - Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address); + Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS); if (Res) break; } @@ -479,8 +444,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Bytes.size() >= 8) { const uint64_t QW = eatBytes<uint64_t>(Bytes); - if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) { - Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address); + if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) { + Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS); if (Res) { if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) == -1) @@ -491,37 +456,37 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address); + Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address); + Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); + Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address); + Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS); if (Res) { if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) convertVOPCDPPInst(MI); break; } - Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); + Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS); if (Res) { IsSDWA = true; break; } - Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); + Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS); if (Res) { IsSDWA = true; break; } - Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address); + Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS); if (Res) { IsSDWA = true; break; } - if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { - Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); + if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) { + Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS); if (Res) break; } @@ -529,8 +494,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and // v_mad_mixhi_f16 for FMA variants. Try to decode using this special // table first so we print the correct name. - if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { - Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); + if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) { + Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS); if (Res) break; } @@ -542,64 +507,64 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Try decode 32-bit instruction if (Bytes.size() < 4) break; const uint32_t DW = eatBytes<uint32_t>(Bytes); - Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address); + Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); + Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); + Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS); if (Res) break; - if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) { - Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address); + if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) { + Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS); if (Res) break; } - if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) { - Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address); + if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) { + Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS); if (Res) break; } - Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address); + Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address); + Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS); if (Res) break; if (Bytes.size() < 4) break; const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; - if (STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]) { - Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address); + if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) { + Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS); if (Res) break; } - if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) { - Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address); + if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) { + Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS); if (Res) break; } - Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address); + Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); + Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); + Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address); + Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address); + Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address); + Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS); } while (false); if (Res && AMDGPU::isMAC(MI.getOpcode())) { @@ -627,7 +592,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Res && (MCII->get(MI.getOpcode()).TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) && - (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) { + (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) { // GFX90A lost TFE, its place is occupied by ACC. int TFEOpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); @@ -714,7 +679,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { - if (STI.getFeatureBits()[AMDGPU::FeatureGFX11]) { + if (STI.hasFeature(AMDGPU::FeatureGFX11)) { // The MCInst still has these fields even though they are no longer encoded // in the GFX11 instruction. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm); @@ -736,12 +701,12 @@ DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { } DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { - if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || - STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { + if (STI.hasFeature(AMDGPU::FeatureGFX9) || + STI.hasFeature(AMDGPU::FeatureGFX10)) { if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst)) // VOPC - insert clamp insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); - } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { + } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) { int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); if (SDst != -1) { // VOPC - insert VCC register as sdst @@ -883,6 +848,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AMDGPU::OpName::vdata); int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); + int RsrcIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); @@ -898,14 +865,14 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { assert(VDataIdx != -1); if (BaseOpcode->BVH) { // Add A16 operand for intersect_ray instructions - if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16)) - addOperand(MI, MCOperand::createImm(1)); + addOperand(MI, MCOperand::createImm(BaseOpcode->A16)); return MCDisassembler::Success; } bool IsAtomic = (VDstIdx != -1); bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; bool IsNSA = false; + bool IsPartialNSA = false; unsigned AddrSize = Info->VAddrDwords; if (isGFX10Plus()) { @@ -927,9 +894,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AddrSize = 16; } else { if (AddrSize > Info->VAddrDwords) { - // The NSA encoding does not contain enough operands for the combination - // of base opcode / dimension. Should this be an error? - return MCDisassembler::Success; + if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) { + // The NSA encoding does not contain enough operands for the + // combination of base opcode / dimension. Should this be an error? + return MCDisassembler::Success; + } + IsPartialNSA = true; } } } @@ -972,17 +942,20 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { } } - // If not using NSA on GFX10+, widen address register to correct size. - unsigned NewVAddr0 = AMDGPU::NoRegister; - if (isGFX10Plus() && !IsNSA && AddrSize != Info->VAddrDwords) { - unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg(); - unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0); - VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0; - - auto AddrRCID = MCII->get(NewOpcode).operands()[VAddr0Idx].RegClass; - NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0, + // If not using NSA on GFX10+, widen vaddr0 address register to correct size. + // If using partial NSA on GFX11+ widen last address register. + int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx; + unsigned NewVAddrSA = AMDGPU::NoRegister; + if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) && + AddrSize != Info->VAddrDwords) { + unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg(); + unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0); + VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA; + + auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass; + NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &MRI.getRegClass(AddrRCID)); - if (NewVAddr0 == AMDGPU::NoRegister) + if (!NewVAddrSA) return MCDisassembler::Success; } @@ -997,8 +970,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { } } - if (NewVAddr0 != AMDGPU::NoRegister) { - MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0); + if (NewVAddrSA) { + MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA); } else if (IsNSA) { assert(AddrSize <= Info->VAddrDwords); MI.erase(MI.begin() + VAddr0Idx + AddrSize, @@ -1159,214 +1132,6 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, return createRegOperand(SRegClassID, Val >> shift); } -MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { - return decodeSrcOp(OPW32, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { - return decodeSrcOp(OPW64, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { - return decodeSrcOp(OPW128, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { - return decodeSrcOp(OPW16, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { - return decodeSrcOp(OPWV216, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const { - return decodeSrcOp(OPWV232, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32_Lo128(unsigned Val) const { - return createRegOperand(AMDGPU::VGPR_32_Lo128RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { - // Some instructions have operand restrictions beyond what the encoding - // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra - // high bit. - Val &= 255; - - return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const { - return decodeSrcOp(OPW32, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const { - return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_288(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_288RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_320(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_320RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_352(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_352RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_384(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_384RegClassID, Val & 255); -} - - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const { - return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const { - return decodeSrcOp(OPW32, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const { - return decodeSrcOp(OPW64, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const { - return decodeSrcOp(OPW128, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const { - using namespace AMDGPU::EncValues; - assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1. - return decodeSrcOp(OPW128, Val | IS_VGPR); -} - -MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const { - using namespace AMDGPU::EncValues; - assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1. - return decodeSrcOp(OPW512, Val | IS_VGPR); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_64RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_96RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_128RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_256RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_288(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_288RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_320(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_320RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_352(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_352RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_384(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_384RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_512RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const { - return createRegOperand(AMDGPU::VReg_1024RegClassID, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { - // table-gen generated disassembler doesn't care about operand types - // leaving only registry class so SSrc_32 operand turns into SReg_32 - // and therefore we accept immediates and literals here as well - return decodeSrcOp(OPW32, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( - unsigned Val) const { - // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI - return decodeOperand_SReg_32(Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( - unsigned Val) const { - // SReg_32_XM0 is SReg_32 without EXEC_HI - return decodeOperand_SReg_32(Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const { - // table-gen generated disassembler doesn't care about operand types - // leaving only registry class so SSrc_32 operand turns into SReg_32 - // and therefore we accept immediates and literals here as well - return decodeSrcOp(OPW32, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { - return decodeSrcOp(OPW64, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { - return decodeSrcOp(OPW64, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { - return decodeSrcOp(OPW128, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { - return decodeDstOp(OPW256, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_288(unsigned Val) const { - return decodeDstOp(OPW288, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_320(unsigned Val) const { - return decodeDstOp(OPW320, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_352(unsigned Val) const { - return decodeDstOp(OPW352, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_384(unsigned Val) const { - return decodeDstOp(OPW384, Val); -} - -MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { - return decodeDstOp(OPW512, Val); -} - // Decode Literals for insts which always have a literal in the encoding MCOperand AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { @@ -1410,21 +1175,21 @@ MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { static int64_t getInlineImmVal32(unsigned Imm) { switch (Imm) { case 240: - return FloatToBits(0.5f); + return llvm::bit_cast<uint32_t>(0.5f); case 241: - return FloatToBits(-0.5f); + return llvm::bit_cast<uint32_t>(-0.5f); case 242: - return FloatToBits(1.0f); + return llvm::bit_cast<uint32_t>(1.0f); case 243: - return FloatToBits(-1.0f); + return llvm::bit_cast<uint32_t>(-1.0f); case 244: - return FloatToBits(2.0f); + return llvm::bit_cast<uint32_t>(2.0f); case 245: - return FloatToBits(-2.0f); + return llvm::bit_cast<uint32_t>(-2.0f); case 246: - return FloatToBits(4.0f); + return llvm::bit_cast<uint32_t>(4.0f); case 247: - return FloatToBits(-4.0f); + return llvm::bit_cast<uint32_t>(-4.0f); case 248: // 1 / (2 * PI) return 0x3e22f983; default: @@ -1435,21 +1200,21 @@ static int64_t getInlineImmVal32(unsigned Imm) { static int64_t getInlineImmVal64(unsigned Imm) { switch (Imm) { case 240: - return DoubleToBits(0.5); + return llvm::bit_cast<uint64_t>(0.5); case 241: - return DoubleToBits(-0.5); + return llvm::bit_cast<uint64_t>(-0.5); case 242: - return DoubleToBits(1.0); + return llvm::bit_cast<uint64_t>(1.0); case 243: - return DoubleToBits(-1.0); + return llvm::bit_cast<uint64_t>(-1.0); case 244: - return DoubleToBits(2.0); + return llvm::bit_cast<uint64_t>(2.0); case 245: - return DoubleToBits(-2.0); + return llvm::bit_cast<uint64_t>(-2.0); case 246: - return DoubleToBits(4.0); + return llvm::bit_cast<uint64_t>(4.0); case 247: - return DoubleToBits(-4.0); + return llvm::bit_cast<uint64_t>(-4.0); case 248: // 1 / (2 * PI) return 0x3fc45f306dc9c882; default: @@ -1482,23 +1247,21 @@ static int64_t getInlineImmVal16(unsigned Imm) { } } -MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { +MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) { assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); // ToDo: case 248: 1/(2*PI) - is allowed only on VI - switch (Width) { - case OPW32: - case OPW128: // splat constants - case OPW512: - case OPW1024: - case OPWV232: + // ImmWidth 0 is a default case where operand should not allow immediates. + // Imm value is still decoded into 32 bit immediate operand, inst printer will + // use it to print verbose error message. + switch (ImmWidth) { + case 0: + case 32: return MCOperand::createImm(getInlineImmVal32(Imm)); - case OPW64: - case OPW256: + case 64: return MCOperand::createImm(getInlineImmVal64(Imm)); - case OPW16: - case OPWV216: + case 16: return MCOperand::createImm(getInlineImmVal16(Imm)); default: llvm_unreachable("implement me"); @@ -1612,7 +1375,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { } MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, - bool MandatoryLiteral) const { + bool MandatoryLiteral, + unsigned ImmWidth) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1639,7 +1403,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, return decodeIntImmed(Val); if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) - return decodeFPImmed(Width, Val); + return decodeFPImmed(ImmWidth, Val); if (Val == LITERAL_CONST) { if (MandatoryLiteral) @@ -1662,26 +1426,6 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, } } -MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { - using namespace AMDGPU::EncValues; - - assert(Val < 128); - assert(Width == OPW256 || Width == OPW512); - - if (Val <= SGPR_MAX) { - // "SGPR_MIN <= Val" is always true and causes compilation warning. - static_assert(SGPR_MIN == 0); - return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); - } - - int TTmpIdx = getTTmpIdx(Val); - if (TTmpIdx >= 0) { - return createSRegOperand(getTtmpClassId(Width), TTmpIdx); - } - - llvm_unreachable("unknown dst register"); -} - // Bit 0 of DstY isn't stored in the instruction, because it's always the // opposite of bit 0 of DstX. MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst, @@ -1764,12 +1508,13 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { } MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, - const unsigned Val) const { + const unsigned Val, + unsigned ImmWidth) const { using namespace AMDGPU::SDWA; using namespace AMDGPU::EncValues; - if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || - STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { + if (STI.hasFeature(AMDGPU::FeatureGFX9) || + STI.hasFeature(AMDGPU::FeatureGFX10)) { // XXX: cast to int is needed to avoid stupid warning: // compare with unsigned is always true if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) && @@ -1795,31 +1540,31 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, return decodeIntImmed(SVal); if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) - return decodeFPImmed(Width, SVal); + return decodeFPImmed(ImmWidth, SVal); return decodeSpecialReg32(SVal); - } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { + } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) { return createRegOperand(getVgprClassId(Width), Val); } llvm_unreachable("unsupported target"); } MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { - return decodeSDWASrc(OPW16, Val); + return decodeSDWASrc(OPW16, Val, 16); } MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { - return decodeSDWASrc(OPW32, Val); + return decodeSDWASrc(OPW32, Val, 32); } MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { using namespace AMDGPU::SDWA; - assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] || - STI.getFeatureBits()[AMDGPU::FeatureGFX10]) && + assert((STI.hasFeature(AMDGPU::FeatureGFX9) || + STI.hasFeature(AMDGPU::FeatureGFX10)) && "SDWAVopcDst should be present only on GFX9+"); - bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64]; + bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64); if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; @@ -1840,18 +1585,19 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { } MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const { - return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? - decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val); + return STI.hasFeature(AMDGPU::FeatureWavefrontSize64) + ? decodeSrcOp(OPW64, Val) + : decodeSrcOp(OPW32, Val); } bool AMDGPUDisassembler::isVI() const { - return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; + return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); } bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); } bool AMDGPUDisassembler::isGFX90A() const { - return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; + return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); } bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); } @@ -1863,7 +1609,7 @@ bool AMDGPUDisassembler::isGFX10Plus() const { } bool AMDGPUDisassembler::isGFX11() const { - return STI.getFeatureBits()[AMDGPU::FeatureGFX11]; + return STI.hasFeature(AMDGPU::FeatureGFX11); } bool AMDGPUDisassembler::isGFX11Plus() const { @@ -1872,16 +1618,21 @@ bool AMDGPUDisassembler::isGFX11Plus() const { bool AMDGPUDisassembler::hasArchitectedFlatScratch() const { - return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; + return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); } //===----------------------------------------------------------------------===// // AMDGPU specific symbol handling //===----------------------------------------------------------------------===// +#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK)) #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ do { \ - KdStream << Indent << DIRECTIVE " " \ - << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \ + } while (0) +#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \ + << GET_FIELD(MASK) << '\n'; \ } while (0) // NOLINTNEXTLINE(readability-identifier-naming) @@ -1896,11 +1647,11 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( // simply calculate the inverse of what the assembler does. uint32_t GranulatedWorkitemVGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; + GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT); - uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * - AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); + uint32_t NextFreeVGPR = + (GranulatedWorkitemVGPRCount + 1) * + AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32); KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; @@ -1924,8 +1675,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( // The disassembler cannot recover the original values of those 3 directives. uint32_t GranulatedWavefrontSGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; + GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT); if (isGFX10Plus() && GranulatedWavefrontSGPRCount) return MCDisassembler::Fail; @@ -2035,7 +1785,46 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( return MCDisassembler::Success; } +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + if (isGFX90A()) { + KdStream << Indent << ".amdhsa_accum_offset " + << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4 + << '\n'; + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0) + return MCDisassembler::Fail; + PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT); + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1) + return MCDisassembler::Fail; + } else if (isGFX10Plus()) { + if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) { + PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count", + COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + } else { + PRINT_PSEUDO_DIRECTIVE_COMMENT( + "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + } + PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", + COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE); + PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", + COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START); + PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", + COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END); + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0) + return MCDisassembler::Fail; + PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", + COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START); + } else if (FourByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; +} +#undef PRINT_PSEUDO_DIRECTIVE_COMMENT #undef PRINT_DIRECTIVE +#undef GET_FIELD MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptorDirective( @@ -2103,30 +1892,16 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( return MCDisassembler::Success; case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: - // COMPUTE_PGM_RSRC3 - // - Only set for GFX10, GFX6-9 have this to be 0. - // - Currently no directives directly control this. FourByteBuffer = DE.getU32(Cursor); - if (!isGFX10Plus() && FourByteBuffer) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream); case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream); case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; + return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream); case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: using namespace amdhsa; @@ -2161,7 +1936,7 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + if (AMDGPU::getAmdhsaCodeObjectVersion() >= AMDGPU::AMDHSA_COV5) PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); @@ -2192,6 +1967,20 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( if (Bytes.size() != 64 || KdAddress % 64 != 0) return MCDisassembler::Fail; + // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10 + // requires us to know the setting of .amdhsa_wavefront_size32 in order to + // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong + // order. Workaround this by first looking up .amdhsa_wavefront_size32 here + // when required. + if (isGFX10Plus()) { + uint16_t KernelCodeProperties = + support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET], + support::endianness::little); + EnableWavefrontSize32 = + AMDHSA_BITS_GET(KernelCodeProperties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + } + std::string Kd; raw_string_ostream KdStream(Kd); KdStream << ".amdhsa_kernel " << KdName << '\n'; |