aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp311
1 files changed, 284 insertions, 27 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 8061c6c509e0..fe62b8590fa0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -123,6 +123,7 @@ DECODE_OPERAND_REG(VReg_96)
DECODE_OPERAND_REG(VReg_128)
DECODE_OPERAND_REG(VReg_256)
DECODE_OPERAND_REG(VReg_512)
+DECODE_OPERAND_REG(VReg_1024)
DECODE_OPERAND_REG(SReg_32)
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
@@ -135,7 +136,9 @@ DECODE_OPERAND_REG(SReg_256)
DECODE_OPERAND_REG(SReg_512)
DECODE_OPERAND_REG(AGPR_32)
+DECODE_OPERAND_REG(AReg_64)
DECODE_OPERAND_REG(AReg_128)
+DECODE_OPERAND_REG(AReg_256)
DECODE_OPERAND_REG(AReg_512)
DECODE_OPERAND_REG(AReg_1024)
DECODE_OPERAND_REG(AV_32)
@@ -157,6 +160,14 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
}
+static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm));
+}
+
static DecodeStatus decodeOperand_VS_16(MCInst &Inst,
unsigned Imm,
uint64_t Addr,
@@ -173,6 +184,14 @@ static DecodeStatus decodeOperand_VS_32(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
}
+static DecodeStatus decodeOperand_AReg_64(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512));
+}
+
static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,
unsigned Imm,
uint64_t Addr,
@@ -181,6 +200,14 @@ static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
}
+static DecodeStatus decodeOperand_AReg_256(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512));
+}
+
static DecodeStatus decodeOperand_AReg_512(MCInst &Inst,
unsigned Imm,
uint64_t Addr,
@@ -197,6 +224,127 @@ static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst,
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
}
+static DecodeStatus decodeOperand_VReg_64(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm));
+}
+
+static DecodeStatus decodeOperand_VReg_128(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm));
+}
+
+static DecodeStatus decodeOperand_VReg_256(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm));
+}
+
+static DecodeStatus decodeOperand_VReg_512(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm));
+}
+
+static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
+}
+
+static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
+ const MCRegisterInfo *MRI) {
+ if (OpIdx < 0)
+ return false;
+
+ const MCOperand &Op = Inst.getOperand(OpIdx);
+ if (!Op.isReg())
+ return false;
+
+ unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
+ auto Reg = Sub ? Sub : Op.getReg();
+ return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
+}
+
+static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst,
+ unsigned Imm,
+ AMDGPUDisassembler::OpWidthTy Opw,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ if (!DAsm->isGFX90A()) {
+ Imm &= 511;
+ } else {
+ // If atomic has both vdata and vdst their register classes are tied.
+ // The bit is decoded along with the vdst, first operand. We need to
+ // change register class to AGPR if vdst was AGPR.
+ // If a DS instruction has both data0 and data1 their register classes
+ // are also tied.
+ unsigned Opc = Inst.getOpcode();
+ uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
+ uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
+ : AMDGPU::OpName::vdata;
+ const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
+ int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
+ if ((int)Inst.getNumOperands() == DataIdx) {
+ int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
+ if (IsAGPROperand(Inst, DstIdx, MRI))
+ Imm |= 512;
+ }
+
+ if (TSFlags & SIInstrFlags::DS) {
+ int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
+ if ((int)Inst.getNumOperands() == Data2Idx &&
+ IsAGPROperand(Inst, DataIdx, MRI))
+ Imm |= 512;
+ }
+ }
+ return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
+}
+
+static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ return decodeOperand_AVLdSt_Any(Inst, Imm,
+ AMDGPUDisassembler::OPW32, Decoder);
+}
+
+static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ return decodeOperand_AVLdSt_Any(Inst, Imm,
+ AMDGPUDisassembler::OPW64, Decoder);
+}
+
+static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ return decodeOperand_AVLdSt_Any(Inst, Imm,
+ AMDGPUDisassembler::OPW96, Decoder);
+}
+
+static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ return decodeOperand_AVLdSt_Any(Inst, Imm,
+ AMDGPUDisassembler::OPW128, Decoder);
+}
+
static DecodeStatus decodeOperand_SReg_32(MCInst &Inst,
unsigned Imm,
uint64_t Addr,
@@ -250,6 +398,9 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
return MCDisassembler::Fail;
}
+// The disassembler is greedy, so we need to check FI operand value to
+// not parse a dpp if the correct literal is not set. For dpp16 the
+// autogenerated decoder checks the dpp literal
static bool isValidDPP8(const MCInst &MI) {
using namespace llvm::AMDGPU::DPP;
int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
@@ -341,6 +492,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
if (Res) break;
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
+ Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address);
+ if (Res)
+ break;
+ }
+
if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
if (Res) break;
@@ -351,6 +508,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Bytes.size() < 4) break;
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
+
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
+ Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
+ if (Res)
+ break;
+ }
+
Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
if (Res) break;
@@ -369,6 +533,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
@@ -379,9 +544,44 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) &&
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) {
- insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1);
+ (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
+ int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::cpol);
+ if (CPolPos != -1) {
+ unsigned CPol =
+ (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
+ AMDGPU::CPol::GLC : 0;
+ if (MI.getNumOperands() <= (unsigned)CPolPos) {
+ insertNamedMCOperand(MI, MCOperand::createImm(CPol),
+ AMDGPU::OpName::cpol);
+ } else if (CPol) {
+ MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
+ }
+ }
+ }
+
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
+ (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) {
+ // GFX90A lost TFE, its place is occupied by ACC.
+ int TFEOpIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
+ if (TFEOpIdx != -1) {
+ auto TFEIter = MI.begin();
+ std::advance(TFEIter, TFEOpIdx);
+ MI.insert(TFEIter, MCOperand::createImm(0));
+ }
+ }
+
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
+ int SWZOpIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
+ if (SWZOpIdx != -1) {
+ auto SWZIter = MI.begin();
+ std::advance(SWZIter, SWZOpIdx);
+ MI.insert(SWZIter, MCOperand::createImm(0));
+ }
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
@@ -453,6 +653,8 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
return MCDisassembler::Success;
}
+// We must check FI == literal to reject not genuine dpp8 insts, and we must
+// first add optional MI operands to check FI
DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
@@ -513,21 +715,21 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
unsigned DimIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
+ int A16Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
const AMDGPU::MIMGDimInfo *Dim =
AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
+ const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
+
+ AddrSize =
+ AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
- AddrSize = BaseOpcode->NumExtraArgs +
- (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
- (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
- (BaseOpcode->LodOrClampOrMip ? 1 : 0);
IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
if (!IsNSA) {
if (AddrSize > 8)
AddrSize = 16;
- else if (AddrSize > 4)
- AddrSize = 8;
} else {
if (AddrSize > Info->VAddrDwords) {
// The NSA encoding does not contain enough operands for the combination
@@ -545,7 +747,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
DstSize = (DstSize + 1) / 2;
}
- if (MI.getOperand(TFEIdx).getImm())
+ if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
DstSize += 1;
if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
@@ -701,6 +903,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
return decodeSrcOp(OPWV216, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const {
+ return decodeSrcOp(OPWV232, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
// Some instructions have operand restrictions beyond what the encoding
// allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
@@ -718,10 +924,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
}
+MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const {
+ return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
}
+MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {
+ return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
}
@@ -758,6 +972,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const {
+ return createRegOperand(AMDGPU::VReg_1024RegClassID, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
// table-gen generated disassembler doesn't care about operand types
// leaving only registry class so SSrc_32 operand turns into SReg_32
@@ -914,8 +1132,10 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
case OPW128: // splat constants
case OPW512:
case OPW1024:
+ case OPWV232:
return MCOperand::createImm(getInlineImmVal32(Imm));
case OPW64:
+ case OPW256:
return MCOperand::createImm(getInlineImmVal64(Imm));
case OPW16:
case OPWV216:
@@ -935,8 +1155,14 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
case OPW16:
case OPWV216:
return VGPR_32RegClassID;
- case OPW64: return VReg_64RegClassID;
+ case OPW64:
+ case OPWV232: return VReg_64RegClassID;
+ case OPW96: return VReg_96RegClassID;
case OPW128: return VReg_128RegClassID;
+ case OPW160: return VReg_160RegClassID;
+ case OPW256: return VReg_256RegClassID;
+ case OPW512: return VReg_512RegClassID;
+ case OPW1024: return VReg_1024RegClassID;
}
}
@@ -950,8 +1176,11 @@ unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
case OPW16:
case OPWV216:
return AGPR_32RegClassID;
- case OPW64: return AReg_64RegClassID;
+ case OPW64:
+ case OPWV232: return AReg_64RegClassID;
+ case OPW96: return AReg_96RegClassID;
case OPW128: return AReg_128RegClassID;
+ case OPW160: return AReg_160RegClassID;
case OPW256: return AReg_256RegClassID;
case OPW512: return AReg_512RegClassID;
case OPW1024: return AReg_1024RegClassID;
@@ -969,8 +1198,11 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
case OPW16:
case OPWV216:
return SGPR_32RegClassID;
- case OPW64: return SGPR_64RegClassID;
+ case OPW64:
+ case OPWV232: return SGPR_64RegClassID;
+ case OPW96: return SGPR_96RegClassID;
case OPW128: return SGPR_128RegClassID;
+ case OPW160: return SGPR_160RegClassID;
case OPW256: return SGPR_256RegClassID;
case OPW512: return SGPR_512RegClassID;
}
@@ -986,7 +1218,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
case OPW16:
case OPWV216:
return TTMP_32RegClassID;
- case OPW64: return TTMP_64RegClassID;
+ case OPW64:
+ case OPWV232: return TTMP_64RegClassID;
case OPW128: return TTMP_128RegClassID;
case OPW256: return TTMP_256RegClassID;
case OPW512: return TTMP_512RegClassID;
@@ -1040,6 +1273,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
case OPWV216:
return decodeSpecialReg32(Val);
case OPW64:
+ case OPWV232:
return decodeSpecialReg64(Val);
default:
llvm_unreachable("unexpected immediate type");
@@ -1209,6 +1443,10 @@ bool AMDGPUDisassembler::isVI() const {
bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
+bool AMDGPUDisassembler::isGFX90A() const {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
+}
+
bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
@@ -1217,6 +1455,10 @@ bool AMDGPUDisassembler::isGFX10Plus() const {
return AMDGPU::isGFX10Plus(STI);
}
+bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
+ return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
+}
+
//===----------------------------------------------------------------------===//
// AMDGPU specific symbol handling
//===----------------------------------------------------------------------===//
@@ -1276,7 +1518,8 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
- KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
+ if (!hasArchitectedFlatScratch())
+ KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
@@ -1327,9 +1570,12 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
using namespace amdhsa;
StringRef Indent = "\t";
- PRINT_DIRECTIVE(
- ".amdhsa_system_sgpr_private_segment_wavefront_offset",
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
+ if (hasArchitectedFlatScratch())
+ PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
+ else
+ PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
@@ -1387,7 +1633,6 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
uint16_t TwoByteBuffer = 0;
uint32_t FourByteBuffer = 0;
- uint64_t EightByteBuffer = 0;
StringRef ReservedBytes;
StringRef Indent = "\t";
@@ -1408,11 +1653,19 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
<< FourByteBuffer << '\n';
return MCDisassembler::Success;
+ case amdhsa::KERNARG_SIZE_OFFSET:
+ FourByteBuffer = DE.getU32(Cursor);
+ KdStream << Indent << ".amdhsa_kernarg_size "
+ << FourByteBuffer << '\n';
+ return MCDisassembler::Success;
+
case amdhsa::RESERVED0_OFFSET:
- // 8 reserved bytes, must be 0.
- EightByteBuffer = DE.getU64(Cursor);
- if (EightByteBuffer) {
- return MCDisassembler::Fail;
+ // 4 reserved bytes, must be 0.
+ ReservedBytes = DE.getBytes(Cursor, 4);
+ for (int I = 0; I < 4; ++I) {
+ if (ReservedBytes[I] != 0) {
+ return MCDisassembler::Fail;
+ }
}
return MCDisassembler::Success;
@@ -1463,8 +1716,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
using namespace amdhsa;
TwoByteBuffer = DE.getU16(Cursor);
- PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+ if (!hasArchitectedFlatScratch())
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
@@ -1473,8 +1727,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
- PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ if (!hasArchitectedFlatScratch())
+ PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
@@ -1589,6 +1844,8 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
Inst.addOperand(MCOperand::createExpr(Add));
return true;
}
+ // Add to list of referenced addresses, so caller can synthesize a label.
+ ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
return false;
}