diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:11:55 +0000 |
commit | 5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch) | |
tree | 1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc | |
parent | 3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff) | |
parent | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc')
8 files changed, 274 insertions, 72 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 44109b9d2919..f91f36ed851b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -28,7 +28,7 @@ namespace { class AMDGPUAsmBackend : public MCAsmBackend { public: - AMDGPUAsmBackend(const Target &T) : MCAsmBackend(support::little) {} + AMDGPUAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::little) {} unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; }; @@ -53,7 +53,8 @@ public: std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target) override; + const MCValue &Target, + const MCSubtargetInfo *STI) override; }; } //End anonymous namespace @@ -185,12 +186,15 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } bool AMDGPUAsmBackend::shouldForceRelocation(const MCAssembler &, const MCFixup &Fixup, - const MCValue &) { + const MCValue &, + const MCSubtargetInfo *STI) { return Fixup.getKind() >= FirstLiteralRelocationKind; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 3f188478ca8b..58eed81e0755 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -63,6 +63,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AMDGPU_REL32_HI; case MCSymbolRefExpr::VK_AMDGPU_REL64: return ELF::R_AMDGPU_REL64; + case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO: + return ELF::R_AMDGPU_ABS32_LO; + case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI: + return ELF::R_AMDGPU_ABS32_HI; } MCFixupKind Kind = Fixup.getKind(); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index ad55c73b22ea..edc244db613d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -10,13 +10,13 @@ #include "AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" -#include "SIRegisterInfo.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/TargetParser/TargetParser.h" @@ -24,12 +24,6 @@ using namespace llvm; using namespace llvm::AMDGPU; -static cl::opt<bool> Keep16BitSuffixes( - "amdgpu-keep-16-bit-reg-suffixes", - cl::desc("Keep .l and .h suffixes in asm for debugging purposes"), - cl::init(false), - cl::ReallyHidden); - void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { // FIXME: The current implementation of // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this @@ -103,28 +97,36 @@ void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - uint16_t Imm = MI->getOperand(OpNo).getImm(); + uint32_t Imm = MI->getOperand(OpNo).getImm(); if (Imm != 0) { O << " offset:"; - printU16ImmDecOperand(MI, OpNo, O); + + // GFX12 uses a 24-bit signed offset for VBUFFER. + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + bool IsVBuffer = Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF); + if (AMDGPU::isGFX12(STI) && IsVBuffer) + O << formatDec(SignExtend32<24>(Imm)); + else + printU16ImmDecOperand(MI, OpNo, O); } } void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - uint16_t Imm = MI->getOperand(OpNo).getImm(); + uint32_t Imm = MI->getOperand(OpNo).getImm(); if (Imm != 0) { O << " offset:"; const MCInstrDesc &Desc = MII.get(MI->getOpcode()); - bool IsFlatSeg = !(Desc.TSFlags & - (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)); + bool AllowNegative = (Desc.TSFlags & (SIInstrFlags::FlatGlobal | + SIInstrFlags::FlatScratch)) || + AMDGPU::isGFX12(STI); - if (IsFlatSeg) // Unsigned offset - printU16ImmDecOperand(MI, OpNo, O); - else // Signed offset + if (AllowNegative) // Signed offset O << formatDec(SignExtend32(Imm, AMDGPU::getNumFlatOffsetBits(STI))); + else // Unsigned offset + printU16ImmDecOperand(MI, OpNo, O); } } @@ -174,6 +176,17 @@ void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { auto Imm = MI->getOperand(OpNo).getImm(); + + if (AMDGPU::isGFX12Plus(STI)) { + const int64_t TH = Imm & CPol::TH; + const int64_t Scope = Imm & CPol::SCOPE; + + printTH(MI, TH, Scope, O); + printScope(Scope, O); + + return; + } + if (Imm & CPol::GLC) O << ((AMDGPU::isGFX940(STI) && !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0" @@ -188,6 +201,89 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, O << " /* unexpected cache policy bit */"; } +void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope, + raw_ostream &O) { + // For th = 0 do not print this field + if (TH == 0) + return; + + const unsigned Opcode = MI->getOpcode(); + const MCInstrDesc &TID = MII.get(Opcode); + bool IsStore = TID.mayStore(); + bool IsAtomic = + TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet); + + O << " th:"; + + if (IsAtomic) { + O << "TH_ATOMIC_"; + if (TH & AMDGPU::CPol::TH_ATOMIC_CASCADE) { + if (Scope >= AMDGPU::CPol::SCOPE_DEV) + O << "CASCADE" << (TH & AMDGPU::CPol::TH_ATOMIC_NT ? "_NT" : "_RT"); + else + O << formatHex(TH); + } else if (TH & AMDGPU::CPol::TH_ATOMIC_NT) + O << "NT" << (TH & AMDGPU::CPol::TH_ATOMIC_RETURN ? "_RETURN" : ""); + else if (TH & AMDGPU::CPol::TH_ATOMIC_RETURN) + O << "RETURN"; + else + O << formatHex(TH); + } else { + if (!IsStore && TH == AMDGPU::CPol::TH_RESERVED) + O << formatHex(TH); + else { + // This will default to printing load variants when neither MayStore nor + // MayLoad flag is present which is the case with instructions like + // image_get_resinfo. + O << (IsStore ? "TH_STORE_" : "TH_LOAD_"); + switch (TH) { + case AMDGPU::CPol::TH_NT: + O << "NT"; + break; + case AMDGPU::CPol::TH_HT: + O << "HT"; + break; + case AMDGPU::CPol::TH_BYPASS: // or LU or RT_WB + O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS" + : (IsStore ? "RT_WB" : "LU")); + break; + case AMDGPU::CPol::TH_NT_RT: + O << "NT_RT"; + break; + case AMDGPU::CPol::TH_RT_NT: + O << "RT_NT"; + break; + case AMDGPU::CPol::TH_NT_HT: + O << "NT_HT"; + break; + case AMDGPU::CPol::TH_NT_WB: + O << "NT_WB"; + break; + default: + llvm_unreachable("unexpected th value"); + } + } + } +} + +void AMDGPUInstPrinter::printScope(int64_t Scope, raw_ostream &O) { + if (Scope == CPol::SCOPE_CU) + return; + + O << " scope:"; + + if (Scope == CPol::SCOPE_SE) + O << "SCOPE_SE"; + else if (Scope == CPol::SCOPE_DEV) + O << "SCOPE_DEV"; + else if (Scope == CPol::SCOPE_SYS) + O << "SCOPE_SYS"; + else + llvm_unreachable("unexpected scope policy value"); + + return; +} + void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNo).getImm()) { @@ -278,12 +374,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, } #endif - StringRef RegName(getRegisterName(RegNo)); - if (!Keep16BitSuffixes) - if (!RegName.consume_back(".l")) - RegName.consume_back(".h"); - - O << RegName; + O << getRegisterName(RegNo); } void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, @@ -333,6 +424,15 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11: + case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12: + case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12: + case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12: + case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12: + case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12: + case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12: + case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12: + case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12: + case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12: printDefaultVccOperand(false, STI, O); break; } @@ -437,7 +537,7 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, - raw_ostream &O) { + raw_ostream &O, bool IsFP) { int64_t SImm = static_cast<int64_t>(Imm); if (SImm >= -16 && SImm <= 64) { O << SImm; @@ -465,7 +565,10 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, else if (Imm == 0x3fc45f306dc9c882 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) O << "0.15915494309189532"; - else { + else if (IsFP) { + assert(AMDGPU::isValid32BitLiteral(Imm, true)); + O << formatHex(static_cast<uint64_t>(Hi_32(Imm))); + } else { assert(isUInt<32>(Imm) || isInt<32>(Imm)); // In rare situations, we will have a 32-bit literal in a 64-bit @@ -532,21 +635,15 @@ void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand, void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - uint8_t Imm = MI->getOperand(OpNo).getImm(); - if (Imm != 0) { - O << " wait_vdst:"; - printU4ImmDecOperand(MI, OpNo, O); - } + O << " wait_vdst:"; + printU4ImmDecOperand(MI, OpNo, O); } void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - uint8_t Imm = MI->getOperand(OpNo).getImm(); - if (Imm != 0) { - O << " wait_exp:"; - printU4ImmDecOperand(MI, OpNo, O); - } + O << " wait_exp:"; + printU4ImmDecOperand(MI, OpNo, O); } bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc, @@ -619,14 +716,17 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: case MCOI::OPERAND_IMMEDIATE: + case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: printImmediate32(Op.getImm(), STI, O); break; case AMDGPU::OPERAND_REG_IMM_INT64: - case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: + printImmediate64(Op.getImm(), STI, O, false); + break; + case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - printImmediate64(Op.getImm(), STI, O); + printImmediate64(Op.getImm(), STI, O, true); break; case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: @@ -688,7 +788,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, if (RCBits == 32) printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O); else if (RCBits == 64) - printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O); + printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O, true); else llvm_unreachable("Invalid register class size"); } @@ -725,6 +825,18 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11: + case AMDGPU::V_CNDMASK_B32_e32_gfx12: + case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12: + case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12: + case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12: + case AMDGPU::V_CNDMASK_B32_dpp_gfx12: + case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12: + case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12: + case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12: + case AMDGPU::V_CNDMASK_B32_dpp8_gfx12: + case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12: + case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12: + case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12: case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7: case AMDGPU::V_CNDMASK_B32_e32_vi: @@ -846,13 +958,9 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo, unsigned Imm = MI->getOperand(OpNo).getImm(); const MCInstrDesc &Desc = MII.get(MI->getOpcode()); - int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::src0); - if (Src0Idx >= 0 && - Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID && - !AMDGPU::isLegal64BitDPPControl(Imm)) { - O << " /* 64 bit dpp only supports row_newbcast */"; + if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) { + O << " /* DP ALU dpp only supports row_newbcast */"; return; } else if (Imm <= DppCtrl::QUAD_PERM_LAST) { O << "quad_perm:["; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 3b14faab136b..95c26de6299e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -66,6 +66,8 @@ private: const MCSubtargetInfo &STI, raw_ostream &O); void printCPol(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printTH(const MCInst *MI, int64_t TH, int64_t Scope, raw_ostream &O); + void printScope(int64_t Scope, raw_ostream &O); void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, @@ -91,7 +93,7 @@ private: void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, - raw_ostream &O); + raw_ostream &O, bool IsFP); void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printRegularOperand(const MCInst *MI, unsigned OpNo, diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index 5e77a8caa04e..b403d69d9ff1 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -49,6 +49,14 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + void getMachineOpValueT16(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + void getMachineOpValueT16Lo128(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + /// Use a fixup to encode the simm16 field for SOPP branch /// instructions. void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, @@ -254,6 +262,7 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO, case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: + case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: return getLit32Encoding(static_cast<uint32_t>(Imm), STI); case AMDGPU::OPERAND_REG_IMM_INT64: @@ -345,7 +354,8 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, // However, dst is encoded as EXEC for compatibility with SP3. if (AMDGPU::isGFX10Plus(STI) && isVCMPX64(Desc)) { assert((Encoding & 0xFF) == 0); - Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO); + Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO) & + AMDGPU::HWEncoding::REG_IDX_MASK; } for (unsigned i = 0; i < bytes; i++) { @@ -403,7 +413,10 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value. llvm_unreachable("Must be immediate or expr"); - support::endian::write<uint32_t>(CB, Imm, support::endianness::little); + if (Desc.operands()[i].OperandType == AMDGPU::OPERAND_REG_IMM_FP64) + Imm = Hi_32(Imm); + + support::endian::write<uint32_t>(CB, Imm, llvm::endianness::little); // Only one literal value allowed break; @@ -488,11 +501,14 @@ void AMDGPUMCCodeEmitter::getAVOperandEncoding( const MCInst &MI, unsigned OpNo, APInt &Op, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { unsigned Reg = MI.getOperand(OpNo).getReg(); - uint64_t Enc = MRI.getEncodingValue(Reg); + unsigned Enc = MRI.getEncodingValue(Reg); + unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK; + bool IsVGPROrAGPR = Enc & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR; // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma // instructions use acc[0:1] modifier bits to distinguish. These bits are // encoded as a virtual 9th bit of the register for these operands. + bool IsAGPR = false; if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) || @@ -507,9 +523,9 @@ void AMDGPUMCCodeEmitter::getAVOperandEncoding( MRI.getRegClass(AMDGPU::AReg_384RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg)) - Enc |= 512; + IsAGPR = true; - Op = Enc; + Op = Idx | (IsVGPROrAGPR << 8) | (IsAGPR << 9); } static bool needsPCRel(const MCExpr *Expr) { @@ -540,13 +556,38 @@ void AMDGPUMCCodeEmitter::getMachineOpValue(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { if (MO.isReg()){ - Op = MRI.getEncodingValue(MO.getReg()); + unsigned Enc = MRI.getEncodingValue(MO.getReg()); + unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK; + bool IsVGPR = Enc & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR; + Op = Idx | (IsVGPR << 8); return; } unsigned OpNo = &MO - MI.begin(); getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); } +void AMDGPUMCCodeEmitter::getMachineOpValueT16( + const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { + llvm_unreachable("TODO: Implement getMachineOpValueT16()."); +} + +void AMDGPUMCCodeEmitter::getMachineOpValueT16Lo128( + const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg()) { + uint16_t Encoding = MRI.getEncodingValue(MO.getReg()); + unsigned RegIdx = Encoding & AMDGPU::HWEncoding::REG_IDX_MASK; + bool IsHi = Encoding & AMDGPU::HWEncoding::IS_HI; + bool IsVGPR = Encoding & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR; + assert((!IsVGPR || isUInt<7>(RegIdx)) && "VGPR0-VGPR127 expected!"); + Op = (IsVGPR ? 0x100 : 0) | (IsHi ? 0x80 : 0) | RegIdx; + return; + } + getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); +} + void AMDGPUMCCodeEmitter::getMachineOpValueCommon( const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 1bd3cdc67800..a855cf585205 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -66,8 +66,8 @@ bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { AMDGPU::GPUKind AK; + // clang-format off switch (ElfMach) { - default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type"); case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; @@ -126,8 +126,12 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; + default: AK = GK_NONE; break; } + // clang-format on StringRef GPUName = getArchNameAMDGCN(AK); if (GPUName != "") @@ -140,6 +144,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { if (AK == AMDGPU::GPUKind::GK_NONE) AK = parseArchR600(GPU); + // clang-format off switch (AK) { case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; @@ -199,8 +204,11 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103; case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150; case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151; + case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200; + case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } + // clang-format on llvm_unreachable("unknown GPU"); } @@ -368,6 +376,12 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + if (hasKernargPreload(STI)) { + PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD, + kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH); + PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD, + kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET); + } PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); @@ -418,9 +432,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( switch (CodeObjectVersion) { default: break; - case AMDGPU::AMDHSA_COV2: - break; - case AMDGPU::AMDHSA_COV3: case AMDGPU::AMDHSA_COV4: case AMDGPU::AMDHSA_COV5: if (getTargetID()->isXnackSupported()) @@ -440,16 +451,16 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); - PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); - PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); + if (IVersion.Major < 12) { + PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); + PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); + } if (IVersion.Major >= 9) PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); + amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL); if (AMDGPU::isGFX90A(STI)) PRINT_FIELD(OS, ".amdhsa_tg_split", KD, compute_pgm_rsrc3, @@ -457,16 +468,19 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( if (IVersion.Major >= 10) { PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE); + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE); PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED); + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED); PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS); + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3, amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); } + if (IVersion.Major >= 12) + PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN); PRINT_FIELD( OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, compute_pgm_rsrc2, @@ -539,7 +553,7 @@ void AMDGPUTargetELFStreamer::EmitNote( unsigned NoteFlags = 0; // TODO Apparently, this is currently needed for OpenCL as mentioned in // https://reviews.llvm.org/D74995 - if (STI.getTargetTriple().getOS() == Triple::AMDHSA) + if (isHsaAbi(STI)) NoteFlags = ELF::SHF_ALLOC; S.pushSection(); @@ -598,11 +612,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() { } unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { - assert(STI.getTargetTriple().getOS() == Triple::AMDHSA); + assert(isHsaAbi(STI)); if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) { switch (*HsaAbiVer) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: case ELF::ELFABIVERSION_AMDGPU_HSA_V3: return getEFlagsV3(); case ELF::ELFABIVERSION_AMDGPU_HSA_V4: @@ -827,6 +840,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata( return true; } +bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader( + const MCSubtargetInfo &STI) { + for (int i = 0; i < 64; ++i) { + OS << "\ts_nop 0\n"; + } + return true; +} + +bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader( + const MCSubtargetInfo &STI) { + const uint32_t Encoded_s_nop = 0xbf800000; + MCStreamer &OS = getStreamer(); + for (int i = 0; i < 64; ++i) { + OS.emitInt32(Encoded_s_nop); + } + return true; +} + bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { const uint32_t Encoded_s_code_end = 0xbf9f0000; const uint32_t Encoded_s_nop = 0xbf800000; @@ -906,6 +937,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1); Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2); Streamer.emitInt16(KernelDescriptor.kernel_code_properties); - for (uint8_t Res : KernelDescriptor.reserved2) + Streamer.emitInt16(KernelDescriptor.kernarg_preload); + for (uint8_t Res : KernelDescriptor.reserved3) Streamer.emitInt8(Res); } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index db43de8fcc5f..55b5246c9210 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -90,6 +90,11 @@ public: /// \returns True on success, false on failure. virtual bool EmitCodeEnd(const MCSubtargetInfo &STI) { return true; } + /// \returns True on success, false on failure. + virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) { + return true; + } + virtual void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, @@ -154,6 +159,9 @@ public: /// \returns True on success, false on failure. bool EmitCodeEnd(const MCSubtargetInfo &STI) override; + /// \returns True on success, false on failure. + bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override; + void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, @@ -215,6 +223,9 @@ public: /// \returns True on success, false on failure. bool EmitCodeEnd(const MCSubtargetInfo &STI) override; + /// \returns True on success, false on failure. + bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override; + void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index bbbfbe4faa0f..6c539df7677e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -142,11 +142,11 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, } void R600MCCodeEmitter::emit(uint32_t Value, SmallVectorImpl<char> &CB) const { - support::endian::write(CB, Value, support::little); + support::endian::write(CB, Value, llvm::endianness::little); } void R600MCCodeEmitter::emit(uint64_t Value, SmallVectorImpl<char> &CB) const { - support::endian::write(CB, Value, support::little); + support::endian::write(CB, Value, llvm::endianness::little); } unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const { |