diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 311 |
1 files changed, 284 insertions, 27 deletions
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 8061c6c509e0..fe62b8590fa0 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -123,6 +123,7 @@ DECODE_OPERAND_REG(VReg_96) DECODE_OPERAND_REG(VReg_128) DECODE_OPERAND_REG(VReg_256) DECODE_OPERAND_REG(VReg_512) +DECODE_OPERAND_REG(VReg_1024) DECODE_OPERAND_REG(SReg_32) DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) @@ -135,7 +136,9 @@ DECODE_OPERAND_REG(SReg_256) DECODE_OPERAND_REG(SReg_512) DECODE_OPERAND_REG(AGPR_32) +DECODE_OPERAND_REG(AReg_64) DECODE_OPERAND_REG(AReg_128) +DECODE_OPERAND_REG(AReg_256) DECODE_OPERAND_REG(AReg_512) DECODE_OPERAND_REG(AReg_1024) DECODE_OPERAND_REG(AV_32) @@ -157,6 +160,14 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } +static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm)); +} + static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -173,6 +184,14 @@ static DecodeStatus decodeOperand_VS_32(MCInst &Inst, return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm)); } +static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512)); +} + static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -181,6 +200,14 @@ static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512)); } +static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512)); +} + static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -197,6 +224,127 @@ static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512)); } +static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm)); +} + +static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm)); +} + +static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm)); +} + +static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm)); +} + +static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm)); +} + +static bool IsAGPROperand(const MCInst &Inst, int OpIdx, + const MCRegisterInfo *MRI) { + if (OpIdx < 0) + return false; + + const MCOperand &Op = Inst.getOperand(OpIdx); + if (!Op.isReg()) + return false; + + unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); + auto Reg = Sub ? Sub : Op.getReg(); + return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255; +} + +static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, + unsigned Imm, + AMDGPUDisassembler::OpWidthTy Opw, + const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + if (!DAsm->isGFX90A()) { + Imm &= 511; + } else { + // If atomic has both vdata and vdst their register classes are tied. + // The bit is decoded along with the vdst, first operand. We need to + // change register class to AGPR if vdst was AGPR. + // If a DS instruction has both data0 and data1 their register classes + // are also tied. + unsigned Opc = Inst.getOpcode(); + uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags; + uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 + : AMDGPU::OpName::vdata; + const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo(); + int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx); + if ((int)Inst.getNumOperands() == DataIdx) { + int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); + if (IsAGPROperand(Inst, DstIdx, MRI)) + Imm |= 512; + } + + if (TSFlags & SIInstrFlags::DS) { + int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); + if ((int)Inst.getNumOperands() == Data2Idx && + IsAGPROperand(Inst, DataIdx, MRI)) + Imm |= 512; + } + } + return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); +} + +static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + return decodeOperand_AVLdSt_Any(Inst, Imm, + AMDGPUDisassembler::OPW32, Decoder); +} + +static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + return decodeOperand_AVLdSt_Any(Inst, Imm, + AMDGPUDisassembler::OPW64, Decoder); +} + +static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + return decodeOperand_AVLdSt_Any(Inst, Imm, + AMDGPUDisassembler::OPW96, Decoder); +} + +static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + return decodeOperand_AVLdSt_Any(Inst, Imm, + AMDGPUDisassembler::OPW128, Decoder); +} + static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -250,6 +398,9 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, return MCDisassembler::Fail; } +// The disassembler is greedy, so we need to check FI operand value to +// not parse a dpp if the correct literal is not set. For dpp16 the +// autogenerated decoder checks the dpp literal static bool isValidDPP8(const MCInst &MI) { using namespace llvm::AMDGPU::DPP; int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi); @@ -341,6 +492,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); if (Res) break; + if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) { + Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address); + if (Res) + break; + } + if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) { Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address); if (Res) break; @@ -351,6 +508,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (Bytes.size() < 4) break; const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; + + if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) { + Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address); + if (Res) + break; + } + Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address); if (Res) break; @@ -369,6 +533,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || + MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a || MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi || MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 || MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || @@ -379,9 +544,44 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } if (Res && (MCII->get(MI.getOpcode()).TSFlags & - (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) && - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) { - insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1); + (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) { + int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::cpol); + if (CPolPos != -1) { + unsigned CPol = + (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ? + AMDGPU::CPol::GLC : 0; + if (MI.getNumOperands() <= (unsigned)CPolPos) { + insertNamedMCOperand(MI, MCOperand::createImm(CPol), + AMDGPU::OpName::cpol); + } else if (CPol) { + MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol); + } + } + } + + if (Res && (MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) && + (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) { + // GFX90A lost TFE, its place is occupied by ACC. + int TFEOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); + if (TFEOpIdx != -1) { + auto TFEIter = MI.begin(); + std::advance(TFEIter, TFEOpIdx); + MI.insert(TFEIter, MCOperand::createImm(0)); + } + } + + if (Res && (MCII->get(MI.getOpcode()).TSFlags & + (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) { + int SWZOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz); + if (SWZOpIdx != -1) { + auto SWZIter = MI.begin(); + std::advance(SWZIter, SWZOpIdx); + MI.insert(SWZIter, MCOperand::createImm(0)); + } } if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { @@ -453,6 +653,8 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { return MCDisassembler::Success; } +// We must check FI == literal to reject not genuine dpp8 insts, and we must +// first add optional MI operands to check FI DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); @@ -513,21 +715,21 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { unsigned DimIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); + int A16Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16); const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); const AMDGPU::MIMGDimInfo *Dim = AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm()); + const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm()); + + AddrSize = + AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI)); - AddrSize = BaseOpcode->NumExtraArgs + - (BaseOpcode->Gradients ? Dim->NumGradients : 0) + - (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + - (BaseOpcode->LodOrClampOrMip ? 1 : 0); IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA; if (!IsNSA) { if (AddrSize > 8) AddrSize = 16; - else if (AddrSize > 4) - AddrSize = 8; } else { if (AddrSize > Info->VAddrDwords) { // The NSA encoding does not contain enough operands for the combination @@ -545,7 +747,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { DstSize = (DstSize + 1) / 2; } - if (MI.getOperand(TFEIdx).getImm()) + if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm()) DstSize += 1; if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords) @@ -701,6 +903,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { return decodeSrcOp(OPWV216, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const { + return decodeSrcOp(OPWV232, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { // Some instructions have operand restrictions beyond what the encoding // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra @@ -718,10 +924,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const { return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255); } +MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const { + return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255); +} + MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const { return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255); } +MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const { + return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255); +} + MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const { return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255); } @@ -758,6 +972,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const { return createRegOperand(AMDGPU::VReg_512RegClassID, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const { + return createRegOperand(AMDGPU::VReg_1024RegClassID, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { // table-gen generated disassembler doesn't care about operand types // leaving only registry class so SSrc_32 operand turns into SReg_32 @@ -914,8 +1132,10 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { case OPW128: // splat constants case OPW512: case OPW1024: + case OPWV232: return MCOperand::createImm(getInlineImmVal32(Imm)); case OPW64: + case OPW256: return MCOperand::createImm(getInlineImmVal64(Imm)); case OPW16: case OPWV216: @@ -935,8 +1155,14 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { case OPW16: case OPWV216: return VGPR_32RegClassID; - case OPW64: return VReg_64RegClassID; + case OPW64: + case OPWV232: return VReg_64RegClassID; + case OPW96: return VReg_96RegClassID; case OPW128: return VReg_128RegClassID; + case OPW160: return VReg_160RegClassID; + case OPW256: return VReg_256RegClassID; + case OPW512: return VReg_512RegClassID; + case OPW1024: return VReg_1024RegClassID; } } @@ -950,8 +1176,11 @@ unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const { case OPW16: case OPWV216: return AGPR_32RegClassID; - case OPW64: return AReg_64RegClassID; + case OPW64: + case OPWV232: return AReg_64RegClassID; + case OPW96: return AReg_96RegClassID; case OPW128: return AReg_128RegClassID; + case OPW160: return AReg_160RegClassID; case OPW256: return AReg_256RegClassID; case OPW512: return AReg_512RegClassID; case OPW1024: return AReg_1024RegClassID; @@ -969,8 +1198,11 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { case OPW16: case OPWV216: return SGPR_32RegClassID; - case OPW64: return SGPR_64RegClassID; + case OPW64: + case OPWV232: return SGPR_64RegClassID; + case OPW96: return SGPR_96RegClassID; case OPW128: return SGPR_128RegClassID; + case OPW160: return SGPR_160RegClassID; case OPW256: return SGPR_256RegClassID; case OPW512: return SGPR_512RegClassID; } @@ -986,7 +1218,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { case OPW16: case OPWV216: return TTMP_32RegClassID; - case OPW64: return TTMP_64RegClassID; + case OPW64: + case OPWV232: return TTMP_64RegClassID; case OPW128: return TTMP_128RegClassID; case OPW256: return TTMP_256RegClassID; case OPW512: return TTMP_512RegClassID; @@ -1040,6 +1273,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c case OPWV216: return decodeSpecialReg32(Val); case OPW64: + case OPWV232: return decodeSpecialReg64(Val); default: llvm_unreachable("unexpected immediate type"); @@ -1209,6 +1443,10 @@ bool AMDGPUDisassembler::isVI() const { bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); } +bool AMDGPUDisassembler::isGFX90A() const { + return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; +} + bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); } bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); } @@ -1217,6 +1455,10 @@ bool AMDGPUDisassembler::isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI); } +bool AMDGPUDisassembler::hasArchitectedFlatScratch() const { + return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; +} + //===----------------------------------------------------------------------===// // AMDGPU specific symbol handling //===----------------------------------------------------------------------===// @@ -1276,7 +1518,8 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; - KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; + if (!hasArchitectedFlatScratch()) + KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; @@ -1327,9 +1570,12 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { using namespace amdhsa; StringRef Indent = "\t"; - PRINT_DIRECTIVE( - ".amdhsa_system_sgpr_private_segment_wavefront_offset", - COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); + if (hasArchitectedFlatScratch()) + PRINT_DIRECTIVE(".amdhsa_enable_private_segment", + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); + else + PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset", + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", @@ -1387,7 +1633,6 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( uint16_t TwoByteBuffer = 0; uint32_t FourByteBuffer = 0; - uint64_t EightByteBuffer = 0; StringRef ReservedBytes; StringRef Indent = "\t"; @@ -1408,11 +1653,19 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( << FourByteBuffer << '\n'; return MCDisassembler::Success; + case amdhsa::KERNARG_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_kernarg_size " + << FourByteBuffer << '\n'; + return MCDisassembler::Success; + case amdhsa::RESERVED0_OFFSET: - // 8 reserved bytes, must be 0. - EightByteBuffer = DE.getU64(Cursor); - if (EightByteBuffer) { - return MCDisassembler::Fail; + // 4 reserved bytes, must be 0. + ReservedBytes = DE.getBytes(Cursor, 4); + for (int I = 0; I < 4; ++I) { + if (ReservedBytes[I] != 0) { + return MCDisassembler::Fail; + } } return MCDisassembler::Success; @@ -1463,8 +1716,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( using namespace amdhsa; TwoByteBuffer = DE.getU16(Cursor); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); + if (!hasArchitectedFlatScratch()) + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", @@ -1473,8 +1727,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + if (!hasArchitectedFlatScratch()) + PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); @@ -1589,6 +1844,8 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, Inst.addOperand(MCOperand::createExpr(Add)); return true; } + // Add to list of referenced addresses, so caller can synthesize a label. + ReferencedAddresses.push_back(static_cast<uint64_t>(Value)); return false; } |
