diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-06-13 19:31:46 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-06-13 19:37:19 +0000 |
commit | e8d8bef961a50d4dc22501cde4fb9fb0be1b2532 (patch) | |
tree | 94f04805f47bb7c59ae29690d8952b6074fff602 /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | |
parent | bb130ff39747b94592cb26d71b7cb097b9a4ea6b (diff) | |
parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
download | src-e8d8bef961a50d4dc22501cde4fb9fb0be1b2532.tar.gz src-e8d8bef961a50d4dc22501cde4fb9fb0be1b2532.zip |
Merge llvm-project main llvmorg-12-init-17869-g8e464dd76bef
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-12-init-17869-g8e464dd76bef, the last commit before the
upstream release/12.x branch was created.
PR: 255570
MFC after: 6 weeks
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 435 |
1 files changed, 390 insertions, 45 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9c2f2e7eecd1..8061c6c509e0 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -17,42 +17,24 @@ // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? #include "Disassembler/AMDGPUDisassembler.h" -#include "AMDGPU.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "SIDefines.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" -#include "llvm-c/Disassembler.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/ELF.h" +#include "llvm-c/DisassemblerTypes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <cstddef> -#include <cstdint> -#include <iterator> -#include <tuple> -#include <vector> using namespace llvm; #define DEBUG_TYPE "amdgpu-disassembler" -#define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \ - : AMDGPU::EncValues::SGPR_MAX_SI) +#define SGPR_MAX \ + (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \ + : AMDGPU::EncValues::SGPR_MAX_SI) using DecodeStatus = llvm::MCDisassembler::DecodeStatus; @@ -63,7 +45,7 @@ AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) { // ToDo: AMDGPUDisassembler supports only VI ISA. - if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10()) + if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus()) report_fatal_error("Disassembly not yet supported for subtarget"); } @@ -139,6 +121,8 @@ DECODE_OPERAND_REG(VS_128) DECODE_OPERAND_REG(VReg_64) DECODE_OPERAND_REG(VReg_96) DECODE_OPERAND_REG(VReg_128) +DECODE_OPERAND_REG(VReg_256) +DECODE_OPERAND_REG(VReg_512) DECODE_OPERAND_REG(SReg_32) DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) @@ -382,15 +366,24 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 || + MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || + MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi || MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 || + MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) { // Insert dummy unused src2_modifiers. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src2_modifiers); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) && + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) { + insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1); + } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); @@ -499,8 +492,16 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { AMDGPU::OpName::d16); assert(VDataIdx != -1); - assert(DMaskIdx != -1); - assert(TFEIdx != -1); + if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray + if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) { + assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa || + MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa || + MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa || + MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa); + addOperand(MI, MCOperand::createImm(1)); + } + return MCDisassembler::Success; + } const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); bool IsAtomic = (VDstIdx != -1); @@ -544,9 +545,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { DstSize = (DstSize + 1) / 2; } - // FIXME: Add tfe support if (MI.getOperand(TFEIdx).getImm()) - return MCDisassembler::Success; + DstSize += 1; if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords) return MCDisassembler::Success; @@ -996,10 +996,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { using namespace AMDGPU::EncValues; - unsigned TTmpMin = - (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN; - unsigned TTmpMax = - (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX; + unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN; + unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX; return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; } @@ -1017,7 +1015,8 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c : getVgprClassId(Width), Val - VGPR_MIN); } if (Val <= SGPR_MAX) { - assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. + // "SGPR_MIN <= Val" is always true and causes compilation warning. + static_assert(SGPR_MIN == 0, ""); return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); } @@ -1054,7 +1053,8 @@ MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) c assert(Width == OPW256 || Width == OPW512); if (Val <= SGPR_MAX) { - assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. + // "SGPR_MIN <= Val" is always true and causes compilation warning. + static_assert(SGPR_MIN == 0, ""); return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); } @@ -1137,8 +1137,8 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, Val - SDWA9EncValues::SRC_VGPR_MIN); } if (SDWA9EncValues::SRC_SGPR_MIN <= Val && - Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10 - : SDWA9EncValues::SRC_SGPR_MAX_SI)) { + Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10 + : SDWA9EncValues::SRC_SGPR_MAX_SI)) { return createSRegOperand(getSgprClassId(Width), Val - SDWA9EncValues::SRC_SGPR_MIN); } @@ -1207,12 +1207,358 @@ bool AMDGPUDisassembler::isVI() const { return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; } -bool AMDGPUDisassembler::isGFX9() const { - return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; +bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); } + +bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); } + +bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); } + +bool AMDGPUDisassembler::isGFX10Plus() const { + return AMDGPU::isGFX10Plus(STI); +} + +//===----------------------------------------------------------------------===// +// AMDGPU specific symbol handling +//===----------------------------------------------------------------------===// +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + + // We cannot accurately backward compute #VGPRs used from + // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same + // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we + // simply calculate the inverse of what the assembler does. + + uint32_t GranulatedWorkitemVGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; + + uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * + AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; + + // We cannot backward compute values used to calculate + // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following + // directives can't be computed: + // .amdhsa_reserve_vcc + // .amdhsa_reserve_flat_scratch + // .amdhsa_reserve_xnack_mask + // They take their respective default values if not specified in the assembly. + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) + // + // We compute the inverse as though all directives apart from NEXT_FREE_SGPR + // are set to 0. So while disassembling we consider that: + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + 0 + 0 + 0) + // + // The disassembler cannot recover the original values of those 3 directives. + + uint32_t GranulatedWavefrontSGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; + + if (isGFX10Plus() && GranulatedWavefrontSGPRCount) + return MCDisassembler::Fail; + + uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * + AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; + KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) + return MCDisassembler::Fail; + + if (isGFX10Plus()) { + PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", + COMPUTE_PGM_RSRC1_WGP_MODE); + PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); + PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); + } + return MCDisassembler::Success; } -bool AMDGPUDisassembler::isGFX10() const { - return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + PRINT_DIRECTIVE( + ".amdhsa_system_sgpr_private_segment_wavefront_offset", + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); + PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_invalid_op", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); + PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); + PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) + return MCDisassembler::Fail; + + return MCDisassembler::Success; +} + +#undef PRINT_DIRECTIVE + +MCDisassembler::DecodeStatus +AMDGPUDisassembler::decodeKernelDescriptorDirective( + DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes, + raw_string_ostream &KdStream) const { +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + + uint16_t TwoByteBuffer = 0; + uint32_t FourByteBuffer = 0; + uint64_t EightByteBuffer = 0; + + StringRef ReservedBytes; + StringRef Indent = "\t"; + + assert(Bytes.size() == 64); + DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); + + switch (Cursor.tell()) { + case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer + << '\n'; + return MCDisassembler::Success; + + case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_private_segment_fixed_size " + << FourByteBuffer << '\n'; + return MCDisassembler::Success; + + case amdhsa::RESERVED0_OFFSET: + // 8 reserved bytes, must be 0. + EightByteBuffer = DE.getU64(Cursor); + if (EightByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: + // KERNEL_CODE_ENTRY_BYTE_OFFSET + // So far no directive controls this for Code Object V3, so simply skip for + // disassembly. + DE.skip(Cursor, 8); + return MCDisassembler::Success; + + case amdhsa::RESERVED1_OFFSET: + // 20 reserved bytes, must be 0. + ReservedBytes = DE.getBytes(Cursor, 20); + for (int I = 0; I < 20; ++I) { + if (ReservedBytes[I] != 0) { + return MCDisassembler::Fail; + } + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: + // COMPUTE_PGM_RSRC3 + // - Only set for GFX10, GFX6-9 have this to be 0. + // - Currently no directives directly control this. + FourByteBuffer = DE.getU32(Cursor); + if (!isGFX10Plus() && FourByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: + using namespace amdhsa; + TwoByteBuffer = DE.getU16(Cursor); + + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) + return MCDisassembler::Fail; + + // Reserved for GFX9 + if (isGFX9() && + (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { + return MCDisassembler::Fail; + } else if (isGFX10Plus()) { + PRINT_DIRECTIVE(".amdhsa_wavefront_size32", + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + } + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) + return MCDisassembler::Fail; + + return MCDisassembler::Success; + + case amdhsa::RESERVED2_OFFSET: + // 6 bytes from here are reserved, must be 0. + ReservedBytes = DE.getBytes(Cursor, 6); + for (int I = 0; I < 6; ++I) { + if (ReservedBytes[I] != 0) + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + default: + llvm_unreachable("Unhandled index. Case statements cover everything."); + return MCDisassembler::Fail; + } +#undef PRINT_DIRECTIVE +} + +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( + StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const { + // CP microcode requires the kernel descriptor to be 64 aligned. + if (Bytes.size() != 64 || KdAddress % 64 != 0) + return MCDisassembler::Fail; + + std::string Kd; + raw_string_ostream KdStream(Kd); + KdStream << ".amdhsa_kernel " << KdName << '\n'; + + DataExtractor::Cursor C(0); + while (C && C.tell() < Bytes.size()) { + MCDisassembler::DecodeStatus Status = + decodeKernelDescriptorDirective(C, Bytes, KdStream); + + cantFail(C.takeError()); + + if (Status == MCDisassembler::Fail) + return MCDisassembler::Fail; + } + KdStream << ".end_amdhsa_kernel\n"; + outs() << KdStream.str(); + return MCDisassembler::Success; +} + +Optional<MCDisassembler::DecodeStatus> +AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &CStream) const { + // Right now only kernel descriptor needs to be handled. + // We ignore all other symbols for target specific handling. + // TODO: + // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code + // Object V2 and V3 when symbols are marked protected. + + // amd_kernel_code_t for Code Object V2. + if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { + Size = 256; + return MCDisassembler::Fail; + } + + // Code Object V3 kernel descriptors. + StringRef Name = Symbol.Name; + if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { + Size = 64; // Size = 64 regardless of success or failure. + return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); + } + return None; } //===----------------------------------------------------------------------===// @@ -1233,11 +1579,10 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, if (!Symbols) return false; - auto Result = std::find_if(Symbols->begin(), Symbols->end(), - [Value](const SymbolInfoTy& Val) { - return Val.Addr == static_cast<uint64_t>(Value) - && Val.Type == ELF::STT_NOTYPE; - }); + auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) { + return Val.Addr == static_cast<uint64_t>(Value) && + Val.Type == ELF::STT_NOTYPE; + }); if (Result != Symbols->end()) { auto *Sym = Ctx.getOrCreateSymbol(Result->Name); const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); |