aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-18 20:30:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:11:55 +0000
commit5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch)
tree1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc
parent3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff)
parent312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp192
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp4
8 files changed, 274 insertions, 72 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 44109b9d2919..f91f36ed851b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -28,7 +28,7 @@ namespace {
class AMDGPUAsmBackend : public MCAsmBackend {
public:
- AMDGPUAsmBackend(const Target &T) : MCAsmBackend(support::little) {}
+ AMDGPUAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::little) {}
unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; };
@@ -53,7 +53,8 @@ public:
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target) override;
+ const MCValue &Target,
+ const MCSubtargetInfo *STI) override;
};
} //End anonymous namespace
@@ -185,12 +186,15 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
return Infos[Kind - FirstTargetFixupKind];
}
bool AMDGPUAsmBackend::shouldForceRelocation(const MCAssembler &,
const MCFixup &Fixup,
- const MCValue &) {
+ const MCValue &,
+ const MCSubtargetInfo *STI) {
return Fixup.getKind() >= FirstLiteralRelocationKind;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 3f188478ca8b..58eed81e0755 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -63,6 +63,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AMDGPU_REL32_HI;
case MCSymbolRefExpr::VK_AMDGPU_REL64:
return ELF::R_AMDGPU_REL64;
+ case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO:
+ return ELF::R_AMDGPU_ABS32_LO;
+ case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI:
+ return ELF::R_AMDGPU_ABS32_HI;
}
MCFixupKind Kind = Fixup.getKind();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index ad55c73b22ea..edc244db613d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -10,13 +10,13 @@
#include "AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
-#include "SIRegisterInfo.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/TargetParser/TargetParser.h"
@@ -24,12 +24,6 @@
using namespace llvm;
using namespace llvm::AMDGPU;
-static cl::opt<bool> Keep16BitSuffixes(
- "amdgpu-keep-16-bit-reg-suffixes",
- cl::desc("Keep .l and .h suffixes in asm for debugging purposes"),
- cl::init(false),
- cl::ReallyHidden);
-
void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
// FIXME: The current implementation of
// AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
@@ -103,28 +97,36 @@ void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint16_t Imm = MI->getOperand(OpNo).getImm();
+ uint32_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << " offset:";
- printU16ImmDecOperand(MI, OpNo, O);
+
+ // GFX12 uses a 24-bit signed offset for VBUFFER.
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ bool IsVBuffer = Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF);
+ if (AMDGPU::isGFX12(STI) && IsVBuffer)
+ O << formatDec(SignExtend32<24>(Imm));
+ else
+ printU16ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint16_t Imm = MI->getOperand(OpNo).getImm();
+ uint32_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << " offset:";
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- bool IsFlatSeg = !(Desc.TSFlags &
- (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch));
+ bool AllowNegative = (Desc.TSFlags & (SIInstrFlags::FlatGlobal |
+ SIInstrFlags::FlatScratch)) ||
+ AMDGPU::isGFX12(STI);
- if (IsFlatSeg) // Unsigned offset
- printU16ImmDecOperand(MI, OpNo, O);
- else // Signed offset
+ if (AllowNegative) // Signed offset
O << formatDec(SignExtend32(Imm, AMDGPU::getNumFlatOffsetBits(STI)));
+ else // Unsigned offset
+ printU16ImmDecOperand(MI, OpNo, O);
}
}
@@ -174,6 +176,17 @@ void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
auto Imm = MI->getOperand(OpNo).getImm();
+
+ if (AMDGPU::isGFX12Plus(STI)) {
+ const int64_t TH = Imm & CPol::TH;
+ const int64_t Scope = Imm & CPol::SCOPE;
+
+ printTH(MI, TH, Scope, O);
+ printScope(Scope, O);
+
+ return;
+ }
+
if (Imm & CPol::GLC)
O << ((AMDGPU::isGFX940(STI) &&
!(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
@@ -188,6 +201,89 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
O << " /* unexpected cache policy bit */";
}
+void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
+ raw_ostream &O) {
+ // For th = 0 do not print this field
+ if (TH == 0)
+ return;
+
+ const unsigned Opcode = MI->getOpcode();
+ const MCInstrDesc &TID = MII.get(Opcode);
+ bool IsStore = TID.mayStore();
+ bool IsAtomic =
+ TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
+
+ O << " th:";
+
+ if (IsAtomic) {
+ O << "TH_ATOMIC_";
+ if (TH & AMDGPU::CPol::TH_ATOMIC_CASCADE) {
+ if (Scope >= AMDGPU::CPol::SCOPE_DEV)
+ O << "CASCADE" << (TH & AMDGPU::CPol::TH_ATOMIC_NT ? "_NT" : "_RT");
+ else
+ O << formatHex(TH);
+ } else if (TH & AMDGPU::CPol::TH_ATOMIC_NT)
+ O << "NT" << (TH & AMDGPU::CPol::TH_ATOMIC_RETURN ? "_RETURN" : "");
+ else if (TH & AMDGPU::CPol::TH_ATOMIC_RETURN)
+ O << "RETURN";
+ else
+ O << formatHex(TH);
+ } else {
+ if (!IsStore && TH == AMDGPU::CPol::TH_RESERVED)
+ O << formatHex(TH);
+ else {
+ // This will default to printing load variants when neither MayStore nor
+ // MayLoad flag is present which is the case with instructions like
+ // image_get_resinfo.
+ O << (IsStore ? "TH_STORE_" : "TH_LOAD_");
+ switch (TH) {
+ case AMDGPU::CPol::TH_NT:
+ O << "NT";
+ break;
+ case AMDGPU::CPol::TH_HT:
+ O << "HT";
+ break;
+ case AMDGPU::CPol::TH_BYPASS: // or LU or RT_WB
+ O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS"
+ : (IsStore ? "RT_WB" : "LU"));
+ break;
+ case AMDGPU::CPol::TH_NT_RT:
+ O << "NT_RT";
+ break;
+ case AMDGPU::CPol::TH_RT_NT:
+ O << "RT_NT";
+ break;
+ case AMDGPU::CPol::TH_NT_HT:
+ O << "NT_HT";
+ break;
+ case AMDGPU::CPol::TH_NT_WB:
+ O << "NT_WB";
+ break;
+ default:
+ llvm_unreachable("unexpected th value");
+ }
+ }
+ }
+}
+
+void AMDGPUInstPrinter::printScope(int64_t Scope, raw_ostream &O) {
+ if (Scope == CPol::SCOPE_CU)
+ return;
+
+ O << " scope:";
+
+ if (Scope == CPol::SCOPE_SE)
+ O << "SCOPE_SE";
+ else if (Scope == CPol::SCOPE_DEV)
+ O << "SCOPE_DEV";
+ else if (Scope == CPol::SCOPE_SYS)
+ O << "SCOPE_SYS";
+ else
+ llvm_unreachable("unexpected scope policy value");
+
+ return;
+}
+
void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (MI->getOperand(OpNo).getImm()) {
@@ -278,12 +374,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
}
#endif
- StringRef RegName(getRegisterName(RegNo));
- if (!Keep16BitSuffixes)
- if (!RegName.consume_back(".l"))
- RegName.consume_back(".h");
-
- O << RegName;
+ O << getRegisterName(RegNo);
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
@@ -333,6 +424,15 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
printDefaultVccOperand(false, STI, O);
break;
}
@@ -437,7 +537,7 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
const MCSubtargetInfo &STI,
- raw_ostream &O) {
+ raw_ostream &O, bool IsFP) {
int64_t SImm = static_cast<int64_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
O << SImm;
@@ -465,7 +565,10 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
else if (Imm == 0x3fc45f306dc9c882 &&
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494309189532";
- else {
+ else if (IsFP) {
+ assert(AMDGPU::isValid32BitLiteral(Imm, true));
+ O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
+ } else {
assert(isUInt<32>(Imm) || isInt<32>(Imm));
// In rare situations, we will have a 32-bit literal in a 64-bit
@@ -532,21 +635,15 @@ void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint8_t Imm = MI->getOperand(OpNo).getImm();
- if (Imm != 0) {
- O << " wait_vdst:";
- printU4ImmDecOperand(MI, OpNo, O);
- }
+ O << " wait_vdst:";
+ printU4ImmDecOperand(MI, OpNo, O);
}
void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- uint8_t Imm = MI->getOperand(OpNo).getImm();
- if (Imm != 0) {
- O << " wait_exp:";
- printU4ImmDecOperand(MI, OpNo, O);
- }
+ O << " wait_exp:";
+ printU4ImmDecOperand(MI, OpNo, O);
}
bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
@@ -619,14 +716,17 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case MCOI::OPERAND_IMMEDIATE:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
printImmediate32(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_IMM_INT64:
- case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ printImmediate64(Op.getImm(), STI, O, false);
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
- printImmediate64(Op.getImm(), STI, O);
+ printImmediate64(Op.getImm(), STI, O, true);
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -688,7 +788,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
if (RCBits == 32)
printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O);
else if (RCBits == 64)
- printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O);
+ printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O, true);
else
llvm_unreachable("Invalid register class size");
}
@@ -725,6 +825,18 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_CNDMASK_B32_e32_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_CNDMASK_B32_dpp_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_CNDMASK_B32_dpp8_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
case AMDGPU::V_CNDMASK_B32_e32_vi:
@@ -846,13 +958,9 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
unsigned Imm = MI->getOperand(OpNo).getImm();
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src0);
- if (Src0Idx >= 0 &&
- Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID &&
- !AMDGPU::isLegal64BitDPPControl(Imm)) {
- O << " /* 64 bit dpp only supports row_newbcast */";
+ if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
+ O << " /* DP ALU dpp only supports row_newbcast */";
return;
} else if (Imm <= DppCtrl::QUAD_PERM_LAST) {
O << "quad_perm:[";
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 3b14faab136b..95c26de6299e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -66,6 +66,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printTH(const MCInst *MI, int64_t TH, int64_t Scope, raw_ostream &O);
+ void printScope(int64_t Scope, raw_ostream &O);
void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -91,7 +93,7 @@ private:
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ raw_ostream &O, bool IsFP);
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printRegularOperand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 5e77a8caa04e..b403d69d9ff1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -49,6 +49,14 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ void getMachineOpValueT16(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void getMachineOpValueT16Lo128(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
/// Use a fixup to encode the simm16 field for SOPP branch
/// instructions.
void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
@@ -254,6 +262,7 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -345,7 +354,8 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
// However, dst is encoded as EXEC for compatibility with SP3.
if (AMDGPU::isGFX10Plus(STI) && isVCMPX64(Desc)) {
assert((Encoding & 0xFF) == 0);
- Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO);
+ Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO) &
+ AMDGPU::HWEncoding::REG_IDX_MASK;
}
for (unsigned i = 0; i < bytes; i++) {
@@ -403,7 +413,10 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
} else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
llvm_unreachable("Must be immediate or expr");
- support::endian::write<uint32_t>(CB, Imm, support::endianness::little);
+ if (Desc.operands()[i].OperandType == AMDGPU::OPERAND_REG_IMM_FP64)
+ Imm = Hi_32(Imm);
+
+ support::endian::write<uint32_t>(CB, Imm, llvm::endianness::little);
// Only one literal value allowed
break;
@@ -488,11 +501,14 @@ void AMDGPUMCCodeEmitter::getAVOperandEncoding(
const MCInst &MI, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
unsigned Reg = MI.getOperand(OpNo).getReg();
- uint64_t Enc = MRI.getEncodingValue(Reg);
+ unsigned Enc = MRI.getEncodingValue(Reg);
+ unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
+ bool IsVGPROrAGPR = Enc & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR;
// VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
// instructions use acc[0:1] modifier bits to distinguish. These bits are
// encoded as a virtual 9th bit of the register for these operands.
+ bool IsAGPR = false;
if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
@@ -507,9 +523,9 @@ void AMDGPUMCCodeEmitter::getAVOperandEncoding(
MRI.getRegClass(AMDGPU::AReg_384RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
- Enc |= 512;
+ IsAGPR = true;
- Op = Enc;
+ Op = Idx | (IsVGPROrAGPR << 8) | (IsAGPR << 9);
}
static bool needsPCRel(const MCExpr *Expr) {
@@ -540,13 +556,38 @@ void AMDGPUMCCodeEmitter::getMachineOpValue(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
if (MO.isReg()){
- Op = MRI.getEncodingValue(MO.getReg());
+ unsigned Enc = MRI.getEncodingValue(MO.getReg());
+ unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
+ bool IsVGPR = Enc & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR;
+ Op = Idx | (IsVGPR << 8);
return;
}
unsigned OpNo = &MO - MI.begin();
getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI);
}
+void AMDGPUMCCodeEmitter::getMachineOpValueT16(
+ const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ llvm_unreachable("TODO: Implement getMachineOpValueT16().");
+}
+
+void AMDGPUMCCodeEmitter::getMachineOpValueT16Lo128(
+ const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) {
+ uint16_t Encoding = MRI.getEncodingValue(MO.getReg());
+ unsigned RegIdx = Encoding & AMDGPU::HWEncoding::REG_IDX_MASK;
+ bool IsHi = Encoding & AMDGPU::HWEncoding::IS_HI;
+ bool IsVGPR = Encoding & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR;
+ assert((!IsVGPR || isUInt<7>(RegIdx)) && "VGPR0-VGPR127 expected!");
+ Op = (IsVGPR ? 0x100 : 0) | (IsHi ? 0x80 : 0) | RegIdx;
+ return;
+ }
+ getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI);
+}
+
void AMDGPUMCCodeEmitter::getMachineOpValueCommon(
const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 1bd3cdc67800..a855cf585205 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -66,8 +66,8 @@ bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
AMDGPU::GPUKind AK;
+ // clang-format off
switch (ElfMach) {
- default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type");
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
@@ -126,8 +126,12 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
+ default: AK = GK_NONE; break;
}
+ // clang-format on
StringRef GPUName = getArchNameAMDGCN(AK);
if (GPUName != "")
@@ -140,6 +144,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
if (AK == AMDGPU::GPUKind::GK_NONE)
AK = parseArchR600(GPU);
+ // clang-format off
switch (AK) {
case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
@@ -199,8 +204,11 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
+ case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
+ case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
+ // clang-format on
llvm_unreachable("unknown GPU");
}
@@ -368,6 +376,12 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+ if (hasKernargPreload(STI)) {
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD,
+ kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH);
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD,
+ kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET);
+ }
PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
@@ -418,9 +432,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
switch (CodeObjectVersion) {
default:
break;
- case AMDGPU::AMDHSA_COV2:
- break;
- case AMDGPU::AMDHSA_COV3:
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
if (getTargetID()->isXnackSupported())
@@ -440,16 +451,16 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
- PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
- PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
- compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+ if (IVersion.Major < 12) {
+ PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
+ PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
+ }
if (IVersion.Major >= 9)
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
if (AMDGPU::isGFX90A(STI))
PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
compute_pgm_rsrc3,
@@ -457,16 +468,19 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (IVersion.Major >= 10) {
PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
- amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
+ if (IVersion.Major >= 12)
+ PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
compute_pgm_rsrc2,
@@ -539,7 +553,7 @@ void AMDGPUTargetELFStreamer::EmitNote(
unsigned NoteFlags = 0;
// TODO Apparently, this is currently needed for OpenCL as mentioned in
// https://reviews.llvm.org/D74995
- if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
+ if (isHsaAbi(STI))
NoteFlags = ELF::SHF_ALLOC;
S.pushSection();
@@ -598,11 +612,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
}
unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
- assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
+ assert(isHsaAbi(STI));
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
switch (*HsaAbiVer) {
- case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return getEFlagsV3();
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
@@ -827,6 +840,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
return true;
}
+bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
+ const MCSubtargetInfo &STI) {
+ for (int i = 0; i < 64; ++i) {
+ OS << "\ts_nop 0\n";
+ }
+ return true;
+}
+
+bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
+ const MCSubtargetInfo &STI) {
+ const uint32_t Encoded_s_nop = 0xbf800000;
+ MCStreamer &OS = getStreamer();
+ for (int i = 0; i < 64; ++i) {
+ OS.emitInt32(Encoded_s_nop);
+ }
+ return true;
+}
+
bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
const uint32_t Encoded_s_code_end = 0xbf9f0000;
const uint32_t Encoded_s_nop = 0xbf800000;
@@ -906,6 +937,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
- for (uint8_t Res : KernelDescriptor.reserved2)
+ Streamer.emitInt16(KernelDescriptor.kernarg_preload);
+ for (uint8_t Res : KernelDescriptor.reserved3)
Streamer.emitInt8(Res);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index db43de8fcc5f..55b5246c9210 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -90,6 +90,11 @@ public:
/// \returns True on success, false on failure.
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI) { return true; }
+ /// \returns True on success, false on failure.
+ virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) {
+ return true;
+ }
+
virtual void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
@@ -154,6 +159,9 @@ public:
/// \returns True on success, false on failure.
bool EmitCodeEnd(const MCSubtargetInfo &STI) override;
+ /// \returns True on success, false on failure.
+ bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override;
+
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
@@ -215,6 +223,9 @@ public:
/// \returns True on success, false on failure.
bool EmitCodeEnd(const MCSubtargetInfo &STI) override;
+ /// \returns True on success, false on failure.
+ bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI) override;
+
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index bbbfbe4faa0f..6c539df7677e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -142,11 +142,11 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI,
}
void R600MCCodeEmitter::emit(uint32_t Value, SmallVectorImpl<char> &CB) const {
- support::endian::write(CB, Value, support::little);
+ support::endian::write(CB, Value, llvm::endianness::little);
}
void R600MCCodeEmitter::emit(uint64_t Value, SmallVectorImpl<char> &CB) const {
- support::endian::write(CB, Value, support::little);
+ support::endian::write(CB, Value, llvm::endianness::little);
}
unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {