aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-06-13 19:31:46 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-06-13 19:37:19 +0000
commite8d8bef961a50d4dc22501cde4fb9fb0be1b2532 (patch)
tree94f04805f47bb7c59ae29690d8952b6074fff602 /contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
parentbb130ff39747b94592cb26d71b7cb097b9a4ea6b (diff)
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
downloadsrc-e8d8bef961a50d4dc22501cde4fb9fb0be1b2532.tar.gz
src-e8d8bef961a50d4dc22501cde4fb9fb0be1b2532.zip
Merge llvm-project main llvmorg-12-init-17869-g8e464dd76bef
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-12-init-17869-g8e464dd76bef, the last commit before the upstream release/12.x branch was created. PR: 255570 MFC after: 6 weeks
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp435
1 files changed, 390 insertions, 45 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9c2f2e7eecd1..8061c6c509e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -17,42 +17,24 @@
// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
#include "Disassembler/AMDGPUDisassembler.h"
-#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm-c/Disassembler.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/ELF.h"
+#include "llvm-c/DisassemblerTypes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <iterator>
-#include <tuple>
-#include <vector>
using namespace llvm;
#define DEBUG_TYPE "amdgpu-disassembler"
-#define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
- : AMDGPU::EncValues::SGPR_MAX_SI)
+#define SGPR_MAX \
+ (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
+ : AMDGPU::EncValues::SGPR_MAX_SI)
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
@@ -63,7 +45,7 @@ AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
- if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10())
+ if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus())
report_fatal_error("Disassembly not yet supported for subtarget");
}
@@ -139,6 +121,8 @@ DECODE_OPERAND_REG(VS_128)
DECODE_OPERAND_REG(VReg_64)
DECODE_OPERAND_REG(VReg_96)
DECODE_OPERAND_REG(VReg_128)
+DECODE_OPERAND_REG(VReg_256)
+DECODE_OPERAND_REG(VReg_512)
DECODE_OPERAND_REG(SReg_32)
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
@@ -382,15 +366,24 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
+ MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
+ MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+ MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
}
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) &&
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) {
+ insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1);
+ }
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
@@ -499,8 +492,16 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::OpName::d16);
assert(VDataIdx != -1);
- assert(DMaskIdx != -1);
- assert(TFEIdx != -1);
+ if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray
+ if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
+ assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa ||
+ MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa ||
+ MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa ||
+ MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa);
+ addOperand(MI, MCOperand::createImm(1));
+ }
+ return MCDisassembler::Success;
+ }
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
bool IsAtomic = (VDstIdx != -1);
@@ -544,9 +545,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
DstSize = (DstSize + 1) / 2;
}
- // FIXME: Add tfe support
if (MI.getOperand(TFEIdx).getImm())
- return MCDisassembler::Success;
+ DstSize += 1;
if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
return MCDisassembler::Success;
@@ -996,10 +996,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
using namespace AMDGPU::EncValues;
- unsigned TTmpMin =
- (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN;
- unsigned TTmpMax =
- (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX;
+ unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
+ unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
}
@@ -1017,7 +1015,8 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
: getVgprClassId(Width), Val - VGPR_MIN);
}
if (Val <= SGPR_MAX) {
- assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ static_assert(SGPR_MIN == 0, "");
return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
}
@@ -1054,7 +1053,8 @@ MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) c
assert(Width == OPW256 || Width == OPW512);
if (Val <= SGPR_MAX) {
- assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ // "SGPR_MIN <= Val" is always true and causes compilation warning.
+ static_assert(SGPR_MIN == 0, "");
return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
}
@@ -1137,8 +1137,8 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
Val - SDWA9EncValues::SRC_VGPR_MIN);
}
if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
- Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
- : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
+ Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
+ : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
return createSRegOperand(getSgprClassId(Width),
Val - SDWA9EncValues::SRC_SGPR_MIN);
}
@@ -1207,12 +1207,358 @@ bool AMDGPUDisassembler::isVI() const {
return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
}
-bool AMDGPUDisassembler::isGFX9() const {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
+bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
+
+bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
+
+bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
+
+bool AMDGPUDisassembler::isGFX10Plus() const {
+ return AMDGPU::isGFX10Plus(STI);
+}
+
+//===----------------------------------------------------------------------===//
+// AMDGPU specific symbol handling
+//===----------------------------------------------------------------------===//
+#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
+ do { \
+ KdStream << Indent << DIRECTIVE " " \
+ << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
+ } while (0)
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
+ using namespace amdhsa;
+ StringRef Indent = "\t";
+
+ // We cannot accurately backward compute #VGPRs used from
+ // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
+ // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
+ // simply calculate the inverse of what the assembler does.
+
+ uint32_t GranulatedWorkitemVGPRCount =
+ (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
+ COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
+
+ uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
+ AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
+
+ KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
+
+ // We cannot backward compute values used to calculate
+ // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
+ // directives can't be computed:
+ // .amdhsa_reserve_vcc
+ // .amdhsa_reserve_flat_scratch
+ // .amdhsa_reserve_xnack_mask
+ // They take their respective default values if not specified in the assembly.
+ //
+ // GRANULATED_WAVEFRONT_SGPR_COUNT
+ // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
+ //
+ // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
+ // are set to 0. So while disassembling we consider that:
+ //
+ // GRANULATED_WAVEFRONT_SGPR_COUNT
+ // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
+ //
+ // The disassembler cannot recover the original values of those 3 directives.
+
+ uint32_t GranulatedWavefrontSGPRCount =
+ (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
+ COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
+
+ if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
+ return MCDisassembler::Fail;
+
+ uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
+ AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
+
+ KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
+ KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
+ KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
+ KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
+ PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
+ PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
+ PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
+ return MCDisassembler::Fail;
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
+ return MCDisassembler::Fail;
+
+ if (isGFX10Plus()) {
+ PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
+ COMPUTE_PGM_RSRC1_WGP_MODE);
+ PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
+ PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ }
+ return MCDisassembler::Success;
}
-bool AMDGPUDisassembler::isGFX10() const {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+// NOLINTNEXTLINE(readability-identifier-naming)
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
+ using namespace amdhsa;
+ StringRef Indent = "\t";
+ PRINT_DIRECTIVE(
+ ".amdhsa_system_sgpr_private_segment_wavefront_offset",
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
+ PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
+ return MCDisassembler::Fail;
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
+ return MCDisassembler::Fail;
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
+ return MCDisassembler::Fail;
+
+ PRINT_DIRECTIVE(
+ ".amdhsa_exception_fp_ieee_invalid_op",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
+ PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
+ PRINT_DIRECTIVE(
+ ".amdhsa_exception_fp_ieee_div_zero",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
+ PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
+ PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
+ COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
+
+ if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+}
+
+#undef PRINT_DIRECTIVE
+
+MCDisassembler::DecodeStatus
+AMDGPUDisassembler::decodeKernelDescriptorDirective(
+ DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
+ raw_string_ostream &KdStream) const {
+#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
+ do { \
+ KdStream << Indent << DIRECTIVE " " \
+ << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
+ } while (0)
+
+ uint16_t TwoByteBuffer = 0;
+ uint32_t FourByteBuffer = 0;
+ uint64_t EightByteBuffer = 0;
+
+ StringRef ReservedBytes;
+ StringRef Indent = "\t";
+
+ assert(Bytes.size() == 64);
+ DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
+
+ switch (Cursor.tell()) {
+ case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
+ << '\n';
+ return MCDisassembler::Success;
+
+ case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ KdStream << Indent << ".amdhsa_private_segment_fixed_size "
+ << FourByteBuffer << '\n';
+ return MCDisassembler::Success;
+
+ case amdhsa::RESERVED0_OFFSET:
+ // 8 reserved bytes, must be 0.
+ EightByteBuffer = DE.getU64(Cursor);
+ if (EightByteBuffer) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
+ // KERNEL_CODE_ENTRY_BYTE_OFFSET
+ // So far no directive controls this for Code Object V3, so simply skip for
+ // disassembly.
+ DE.skip(Cursor, 8);
+ return MCDisassembler::Success;
+
+ case amdhsa::RESERVED1_OFFSET:
+ // 20 reserved bytes, must be 0.
+ ReservedBytes = DE.getBytes(Cursor, 20);
+ for (int I = 0; I < 20; ++I) {
+ if (ReservedBytes[I] != 0) {
+ return MCDisassembler::Fail;
+ }
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
+ // COMPUTE_PGM_RSRC3
+ // - Only set for GFX10, GFX6-9 have this to be 0.
+ // - Currently no directives directly control this.
+ FourByteBuffer = DE.getU32(Cursor);
+ if (!isGFX10Plus() && FourByteBuffer) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
+ MCDisassembler::Fail) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
+ MCDisassembler::Fail) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
+ using namespace amdhsa;
+ TwoByteBuffer = DE.getU16(Cursor);
+
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+
+ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
+ return MCDisassembler::Fail;
+
+ // Reserved for GFX9
+ if (isGFX9() &&
+ (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
+ return MCDisassembler::Fail;
+ } else if (isGFX10Plus()) {
+ PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ }
+
+ if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+
+ case amdhsa::RESERVED2_OFFSET:
+ // 6 bytes from here are reserved, must be 0.
+ ReservedBytes = DE.getBytes(Cursor, 6);
+ for (int I = 0; I < 6; ++I) {
+ if (ReservedBytes[I] != 0)
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+
+ default:
+ llvm_unreachable("Unhandled index. Case statements cover everything.");
+ return MCDisassembler::Fail;
+ }
+#undef PRINT_DIRECTIVE
+}
+
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
+ StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
+ // CP microcode requires the kernel descriptor to be 64 aligned.
+ if (Bytes.size() != 64 || KdAddress % 64 != 0)
+ return MCDisassembler::Fail;
+
+ std::string Kd;
+ raw_string_ostream KdStream(Kd);
+ KdStream << ".amdhsa_kernel " << KdName << '\n';
+
+ DataExtractor::Cursor C(0);
+ while (C && C.tell() < Bytes.size()) {
+ MCDisassembler::DecodeStatus Status =
+ decodeKernelDescriptorDirective(C, Bytes, KdStream);
+
+ cantFail(C.takeError());
+
+ if (Status == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ }
+ KdStream << ".end_amdhsa_kernel\n";
+ outs() << KdStream.str();
+ return MCDisassembler::Success;
+}
+
+Optional<MCDisassembler::DecodeStatus>
+AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &CStream) const {
+ // Right now only kernel descriptor needs to be handled.
+ // We ignore all other symbols for target specific handling.
+ // TODO:
+ // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
+ // Object V2 and V3 when symbols are marked protected.
+
+ // amd_kernel_code_t for Code Object V2.
+ if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
+ Size = 256;
+ return MCDisassembler::Fail;
+ }
+
+ // Code Object V3 kernel descriptors.
+ StringRef Name = Symbol.Name;
+ if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
+ Size = 64; // Size = 64 regardless of success or failure.
+ return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
+ }
+ return None;
}
//===----------------------------------------------------------------------===//
@@ -1233,11 +1579,10 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
if (!Symbols)
return false;
- auto Result = std::find_if(Symbols->begin(), Symbols->end(),
- [Value](const SymbolInfoTy& Val) {
- return Val.Addr == static_cast<uint64_t>(Value)
- && Val.Type == ELF::STT_NOTYPE;
- });
+ auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
+ return Val.Addr == static_cast<uint64_t>(Value) &&
+ Val.Type == ELF::STT_NOTYPE;
+ });
if (Result != Symbols->end()) {
auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);