diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 311 | 
1 files changed, 284 insertions, 27 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 8061c6c509e0..fe62b8590fa0 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -123,6 +123,7 @@ DECODE_OPERAND_REG(VReg_96)  DECODE_OPERAND_REG(VReg_128)  DECODE_OPERAND_REG(VReg_256)  DECODE_OPERAND_REG(VReg_512) +DECODE_OPERAND_REG(VReg_1024)  DECODE_OPERAND_REG(SReg_32)  DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) @@ -135,7 +136,9 @@ DECODE_OPERAND_REG(SReg_256)  DECODE_OPERAND_REG(SReg_512)  DECODE_OPERAND_REG(AGPR_32) +DECODE_OPERAND_REG(AReg_64)  DECODE_OPERAND_REG(AReg_128) +DECODE_OPERAND_REG(AReg_256)  DECODE_OPERAND_REG(AReg_512)  DECODE_OPERAND_REG(AReg_1024)  DECODE_OPERAND_REG(AV_32) @@ -157,6 +160,14 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,    return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));  } +static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, +                                           unsigned Imm, +                                           uint64_t Addr, +                                           const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm)); +} +  static DecodeStatus decodeOperand_VS_16(MCInst &Inst,                                          unsigned Imm,                                          uint64_t Addr, @@ -173,6 +184,14 @@ static DecodeStatus decodeOperand_VS_32(MCInst &Inst,    return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));  } +static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, +                                          unsigned Imm, +                                          uint64_t Addr, +                                          const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512)); +} +  static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,                                             unsigned Imm,                                             uint64_t Addr, @@ -181,6 +200,14 @@ static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,    return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));  } +static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, +                                           unsigned Imm, +                                           uint64_t Addr, +                                           const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512)); +} +  static DecodeStatus decodeOperand_AReg_512(MCInst &Inst,                                             unsigned Imm,                                             uint64_t Addr, @@ -197,6 +224,127 @@ static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst,    return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));  } +static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, +                                          unsigned Imm, +                                          uint64_t Addr, +                                          const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm)); +} + +static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, +                                           unsigned Imm, +                                           uint64_t Addr, +                                           const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm)); +} + +static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, +                                           unsigned Imm, +                                           uint64_t Addr, +                                           const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm)); +} + +static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, +                                           unsigned Imm, +                                           uint64_t Addr, +                                           const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm)); +} + +static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, +                                            unsigned Imm, +                                            uint64_t Addr, +                                            const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm)); +} + +static bool IsAGPROperand(const MCInst &Inst, int OpIdx, +                          const MCRegisterInfo *MRI) { +  if (OpIdx < 0) +    return false; + +  const MCOperand &Op = Inst.getOperand(OpIdx); +  if (!Op.isReg()) +    return false; + +  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); +  auto Reg = Sub ? Sub : Op.getReg(); +  return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255; +} + +static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, +                                             unsigned Imm, +                                             AMDGPUDisassembler::OpWidthTy Opw, +                                             const void *Decoder) { +  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); +  if (!DAsm->isGFX90A()) { +    Imm &= 511; +  } else { +    // If atomic has both vdata and vdst their register classes are tied. +    // The bit is decoded along with the vdst, first operand. We need to +    // change register class to AGPR if vdst was AGPR. +    // If a DS instruction has both data0 and data1 their register classes +    // are also tied. +    unsigned Opc = Inst.getOpcode(); +    uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags; +    uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 +                                                        : AMDGPU::OpName::vdata; +    const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo(); +    int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx); +    if ((int)Inst.getNumOperands() == DataIdx) { +      int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); +      if (IsAGPROperand(Inst, DstIdx, MRI)) +        Imm |= 512; +    } + +    if (TSFlags & SIInstrFlags::DS) { +      int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); +      if ((int)Inst.getNumOperands() == Data2Idx && +          IsAGPROperand(Inst, DataIdx, MRI)) +        Imm |= 512; +    } +  } +  return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); +} + +static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, +                                                 unsigned Imm, +                                                 uint64_t Addr, +                                                 const void *Decoder) { +  return decodeOperand_AVLdSt_Any(Inst, Imm, +                                  AMDGPUDisassembler::OPW32, Decoder); +} + +static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst, +                                                 unsigned Imm, +                                                 uint64_t Addr, +                                                 const void *Decoder) { +  return decodeOperand_AVLdSt_Any(Inst, Imm, +                                  AMDGPUDisassembler::OPW64, Decoder); +} + +static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst, +                                                 unsigned Imm, +                                                 uint64_t Addr, +                                                 const void *Decoder) { +  return decodeOperand_AVLdSt_Any(Inst, Imm, +                                  AMDGPUDisassembler::OPW96, Decoder); +} + +static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst, +                                                  unsigned Imm, +                                                  uint64_t Addr, +                                                  const void *Decoder) { +  return decodeOperand_AVLdSt_Any(Inst, Imm, +                                  AMDGPUDisassembler::OPW128, Decoder); +} +  static DecodeStatus decodeOperand_SReg_32(MCInst &Inst,                                            unsigned Imm,                                            uint64_t Addr, @@ -250,6 +398,9 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,    return MCDisassembler::Fail;  } +// The disassembler is greedy, so we need to check FI operand value to +// not parse a dpp if the correct literal is not set. For dpp16 the +// autogenerated decoder checks the dpp literal  static bool isValidDPP8(const MCInst &MI) {    using namespace llvm::AMDGPU::DPP;    int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi); @@ -341,6 +492,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,      Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);      if (Res) break; +    if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) { +      Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address); +      if (Res) +        break; +    } +      if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {        Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);        if (Res) break; @@ -351,6 +508,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,      if (Bytes.size() < 4) break;      const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; + +    if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) { +      Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address); +      if (Res) +        break; +    } +      Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);      if (Res) break; @@ -369,6 +533,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,                MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||                MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||                MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || +              MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a ||                MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||                MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||                MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || @@ -379,9 +544,44 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,    }    if (Res && (MCII->get(MI.getOpcode()).TSFlags & -                        (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) && -      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) { -    insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1); +          (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) { +    int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(), +                                             AMDGPU::OpName::cpol); +    if (CPolPos != -1) { +      unsigned CPol = +          (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ? +              AMDGPU::CPol::GLC : 0; +      if (MI.getNumOperands() <= (unsigned)CPolPos) { +        insertNamedMCOperand(MI, MCOperand::createImm(CPol), +                             AMDGPU::OpName::cpol); +      } else if (CPol) { +        MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol); +      } +    } +  } + +  if (Res && (MCII->get(MI.getOpcode()).TSFlags & +              (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) && +             (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) { +    // GFX90A lost TFE, its place is occupied by ACC. +    int TFEOpIdx = +        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); +    if (TFEOpIdx != -1) { +      auto TFEIter = MI.begin(); +      std::advance(TFEIter, TFEOpIdx); +      MI.insert(TFEIter, MCOperand::createImm(0)); +    } +  } + +  if (Res && (MCII->get(MI.getOpcode()).TSFlags & +              (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) { +    int SWZOpIdx = +        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz); +    if (SWZOpIdx != -1) { +      auto SWZIter = MI.begin(); +      std::advance(SWZIter, SWZOpIdx); +      MI.insert(SWZIter, MCOperand::createImm(0)); +    }    }    if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { @@ -453,6 +653,8 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {    return MCDisassembler::Success;  } +// We must check FI == literal to reject not genuine dpp8 insts, and we must +// first add optional MI operands to check FI  DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {    unsigned Opc = MI.getOpcode();    unsigned DescNumOps = MCII->get(Opc).getNumOperands(); @@ -513,21 +715,21 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {    if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {      unsigned DimIdx =          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); +    int A16Idx = +        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);      const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =          AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);      const AMDGPU::MIMGDimInfo *Dim =          AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm()); +    const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm()); + +    AddrSize = +        AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI)); -    AddrSize = BaseOpcode->NumExtraArgs + -               (BaseOpcode->Gradients ? Dim->NumGradients : 0) + -               (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + -               (BaseOpcode->LodOrClampOrMip ? 1 : 0);      IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;      if (!IsNSA) {        if (AddrSize > 8)          AddrSize = 16; -      else if (AddrSize > 4) -        AddrSize = 8;      } else {        if (AddrSize > Info->VAddrDwords) {          // The NSA encoding does not contain enough operands for the combination @@ -545,7 +747,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {      DstSize = (DstSize + 1) / 2;    } -  if (MI.getOperand(TFEIdx).getImm()) +  if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())      DstSize += 1;    if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords) @@ -701,6 +903,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {    return decodeSrcOp(OPWV216, Val);  } +MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const { +  return decodeSrcOp(OPWV232, Val); +} +  MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {    // Some instructions have operand restrictions beyond what the encoding    // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra @@ -718,10 +924,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {    return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);  } +MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const { +  return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255); +} +  MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {    return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);  } +MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const { +  return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255); +} +  MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {    return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);  } @@ -758,6 +972,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {    return createRegOperand(AMDGPU::VReg_512RegClassID, Val);  } +MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const { +  return createRegOperand(AMDGPU::VReg_1024RegClassID, Val); +} +  MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {    // table-gen generated disassembler doesn't care about operand types    // leaving only registry class so SSrc_32 operand turns into SReg_32 @@ -914,8 +1132,10 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {    case OPW128: // splat constants    case OPW512:    case OPW1024: +  case OPWV232:      return MCOperand::createImm(getInlineImmVal32(Imm));    case OPW64: +  case OPW256:      return MCOperand::createImm(getInlineImmVal64(Imm));    case OPW16:    case OPWV216: @@ -935,8 +1155,14 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {    case OPW16:    case OPWV216:      return VGPR_32RegClassID; -  case OPW64: return VReg_64RegClassID; +  case OPW64: +  case OPWV232: return VReg_64RegClassID; +  case OPW96: return VReg_96RegClassID;    case OPW128: return VReg_128RegClassID; +  case OPW160: return VReg_160RegClassID; +  case OPW256: return VReg_256RegClassID; +  case OPW512: return VReg_512RegClassID; +  case OPW1024: return VReg_1024RegClassID;    }  } @@ -950,8 +1176,11 @@ unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {    case OPW16:    case OPWV216:      return AGPR_32RegClassID; -  case OPW64: return AReg_64RegClassID; +  case OPW64: +  case OPWV232: return AReg_64RegClassID; +  case OPW96: return AReg_96RegClassID;    case OPW128: return AReg_128RegClassID; +  case OPW160: return AReg_160RegClassID;    case OPW256: return AReg_256RegClassID;    case OPW512: return AReg_512RegClassID;    case OPW1024: return AReg_1024RegClassID; @@ -969,8 +1198,11 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {    case OPW16:    case OPWV216:      return SGPR_32RegClassID; -  case OPW64: return SGPR_64RegClassID; +  case OPW64: +  case OPWV232: return SGPR_64RegClassID; +  case OPW96: return SGPR_96RegClassID;    case OPW128: return SGPR_128RegClassID; +  case OPW160: return SGPR_160RegClassID;    case OPW256: return SGPR_256RegClassID;    case OPW512: return SGPR_512RegClassID;    } @@ -986,7 +1218,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {    case OPW16:    case OPWV216:      return TTMP_32RegClassID; -  case OPW64: return TTMP_64RegClassID; +  case OPW64: +  case OPWV232: return TTMP_64RegClassID;    case OPW128: return TTMP_128RegClassID;    case OPW256: return TTMP_256RegClassID;    case OPW512: return TTMP_512RegClassID; @@ -1040,6 +1273,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c    case OPWV216:      return decodeSpecialReg32(Val);    case OPW64: +  case OPWV232:      return decodeSpecialReg64(Val);    default:      llvm_unreachable("unexpected immediate type"); @@ -1209,6 +1443,10 @@ bool AMDGPUDisassembler::isVI() const {  bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); } +bool AMDGPUDisassembler::isGFX90A() const { +  return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; +} +  bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }  bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); } @@ -1217,6 +1455,10 @@ bool AMDGPUDisassembler::isGFX10Plus() const {    return AMDGPU::isGFX10Plus(STI);  } +bool AMDGPUDisassembler::hasArchitectedFlatScratch() const { +  return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; +} +  //===----------------------------------------------------------------------===//  // AMDGPU specific symbol handling  //===----------------------------------------------------------------------===// @@ -1276,7 +1518,8 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(                            AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);    KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; -  KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; +  if (!hasArchitectedFlatScratch()) +    KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';    KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';    KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; @@ -1327,9 +1570,12 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(      uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {    using namespace amdhsa;    StringRef Indent = "\t"; -  PRINT_DIRECTIVE( -      ".amdhsa_system_sgpr_private_segment_wavefront_offset", -      COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); +  if (hasArchitectedFlatScratch()) +    PRINT_DIRECTIVE(".amdhsa_enable_private_segment", +                    COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); +  else +    PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset", +                    COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);    PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",                    COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);    PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", @@ -1387,7 +1633,6 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(    uint16_t TwoByteBuffer = 0;    uint32_t FourByteBuffer = 0; -  uint64_t EightByteBuffer = 0;    StringRef ReservedBytes;    StringRef Indent = "\t"; @@ -1408,11 +1653,19 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(               << FourByteBuffer << '\n';      return MCDisassembler::Success; +  case amdhsa::KERNARG_SIZE_OFFSET: +    FourByteBuffer = DE.getU32(Cursor); +    KdStream << Indent << ".amdhsa_kernarg_size " +             << FourByteBuffer << '\n'; +    return MCDisassembler::Success; +    case amdhsa::RESERVED0_OFFSET: -    // 8 reserved bytes, must be 0. -    EightByteBuffer = DE.getU64(Cursor); -    if (EightByteBuffer) { -      return MCDisassembler::Fail; +    // 4 reserved bytes, must be 0. +    ReservedBytes = DE.getBytes(Cursor, 4); +    for (int I = 0; I < 4; ++I) { +      if (ReservedBytes[I] != 0) { +        return MCDisassembler::Fail; +      }      }      return MCDisassembler::Success; @@ -1463,8 +1716,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(      using namespace amdhsa;      TwoByteBuffer = DE.getU16(Cursor); -    PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", -                    KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); +    if (!hasArchitectedFlatScratch()) +      PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", +                      KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);      PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",                      KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);      PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", @@ -1473,8 +1727,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(                      KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);      PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",                      KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); -    PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", -                    KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); +    if (!hasArchitectedFlatScratch()) +      PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", +                      KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);      PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",                      KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); @@ -1589,6 +1844,8 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,      Inst.addOperand(MCOperand::createExpr(Add));      return true;    } +  // Add to list of referenced addresses, so caller can synthesize a label. +  ReferencedAddresses.push_back(static_cast<uint64_t>(Value));    return false;  }  | 
