aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-17 20:41:09 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-12-17 20:41:09 +0000
commit312c0ed19cc5276a17bacf2120097bec4515b0f1 (patch)
treee6e4a4163840b73ba54bb0d3b70ee4899e4b7434 /llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
parentb1c73532ee8997fe5dfbeb7d223027bdf99758a0 (diff)
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp133
1 files changed, 104 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 092845d391a3..3b69a37728ea 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -373,7 +373,7 @@ public:
bool isOffen() const { return isImmTy(ImmTyOffen); }
bool isIdxen() const { return isImmTy(ImmTyIdxen); }
bool isAddr64() const { return isImmTy(ImmTyAddr64); }
- bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
+ bool isOffset() const { return isImmTy(ImmTyOffset); }
bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
@@ -893,6 +893,7 @@ public:
bool isSDelayALU() const;
bool isHwreg() const;
bool isSendMsg() const;
+ bool isSplitBarrier() const;
bool isSwizzle() const;
bool isSMRDOffset8() const;
bool isSMEMOffset() const;
@@ -1665,6 +1666,7 @@ private:
SMLoc getInstLoc(const OperandVector &Operands) const;
bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
+ bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSOPLiteral(const MCInst &Inst) const;
@@ -1856,6 +1858,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -2185,7 +2188,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_KIMM32:
- case AMDGPU::OPERAND_KIMM16: {
+ case AMDGPU::OPERAND_KIMM16:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
@@ -2226,6 +2230,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
+ case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
if (isSafeTruncation(Val, 32) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -2570,7 +2575,7 @@ static bool isRegularReg(RegisterKind Kind) {
static const RegInfo* getRegularRegInfo(StringRef Str) {
for (const RegInfo &Reg : RegularRegisters)
- if (Str.startswith(Reg.Name))
+ if (Str.starts_with(Reg.Name))
return &Reg;
return nullptr;
}
@@ -2630,7 +2635,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
// SGPR and TTMP registers must be aligned.
// Max required alignment is 4 dwords.
- AlignSize = std::min(RegWidth / 32, 4u);
+ AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
}
if (RegNum % AlignSize != 0) {
@@ -3411,12 +3416,16 @@ unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
case AMDGPU::V_LSHLREV_B64_e64_gfx11:
+ case AMDGPU::V_LSHLREV_B64_e32_gfx12:
+ case AMDGPU::V_LSHLREV_B64_e64_gfx12:
case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
case AMDGPU::V_LSHRREV_B64_e64_gfx11:
+ case AMDGPU::V_LSHRREV_B64_e64_gfx12:
case AMDGPU::V_ASHRREV_I64_e64:
case AMDGPU::V_ASHRREV_I64_gfx10:
case AMDGPU::V_ASHRREV_I64_e64_gfx11:
+ case AMDGPU::V_ASHRREV_I64_e64_gfx12:
case AMDGPU::V_LSHL_B64_e64:
case AMDGPU::V_LSHR_B64_e64:
case AMDGPU::V_ASHR_I64_e64:
@@ -3571,8 +3580,12 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
: MCRegister::NoRegister;
};
+ // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
+
const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
- auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
+ auto InvalidCompOprIdx =
+ InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
if (!InvalidCompOprIdx)
return true;
@@ -4131,6 +4144,40 @@ SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
return getLoc();
}
+bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
+ const OperandVector &Operands) {
+ auto Opcode = Inst.getOpcode();
+ auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
+ if (OpNum == -1)
+ return true;
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & SIInstrFlags::FLAT))
+ return validateFlatOffset(Inst, Operands);
+
+ if ((TSFlags & SIInstrFlags::SMRD))
+ return validateSMEMOffset(Inst, Operands);
+
+ const auto &Op = Inst.getOperand(OpNum);
+ if (isGFX12Plus() &&
+ (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
+ const unsigned OffsetSize = 24;
+ if (!isIntN(OffsetSize, Op.getImm())) {
+ Error(getFlatOffsetLoc(Operands),
+ Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
+ return false;
+ }
+ } else {
+ const unsigned OffsetSize = 16;
+ if (!isUIntN(OffsetSize, Op.getImm())) {
+ Error(getFlatOffsetLoc(Operands),
+ Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
+ return false;
+ }
+ }
+ return true;
+}
+
bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
const OperandVector &Operands) {
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
@@ -4148,11 +4195,12 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
return false;
}
- // For FLAT segment the offset must be positive;
+ // For pre-GFX12 FLAT instructions the offset must be positive;
// MSB is ignored and forced to zero.
unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
bool AllowNegative =
- TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
+ (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
+ isGFX12Plus();
if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
Error(getFlatOffsetLoc(Operands),
Twine("expected a ") +
@@ -4479,7 +4527,7 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
SMLoc BLGPLoc = getBLGPLoc(Operands);
if (!BLGPLoc.isValid())
return true;
- bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
+ bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
auto FB = getFeatureBits();
bool UsesNeg = false;
if (FB[AMDGPU::FeatureGFX940Insts]) {
@@ -4788,10 +4836,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMovrels(Inst, Operands)) {
return false;
}
- if (!validateFlatOffset(Inst, Operands)) {
- return false;
- }
- if (!validateSMEMOffset(Inst, Operands)) {
+ if (!validateOffset(Inst, Operands)) {
return false;
}
if (!validateMAIAccWrite(Inst, Operands)) {
@@ -5334,11 +5379,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
+ if (IVersion.Major >= 12)
+ return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+ ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
- Val, ValRange);
+ if (IVersion.Major >= 12)
+ return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+ ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
@@ -5401,6 +5452,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
Val, ValRange);
+ } else if (ID == ".amdhsa_round_robin_scheduling") {
+ if (IVersion.Major < 12)
+ return Error(IDRange.Start, "directive requires gfx12+", IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
+ ValRange);
} else {
return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
}
@@ -5554,6 +5611,18 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
Lex();
+ if (ID == "enable_dx10_clamp") {
+ if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
+ isGFX12Plus())
+ return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
+ }
+
+ if (ID == "enable_ieee_mode") {
+ if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
+ isGFX12Plus())
+ return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
+ }
+
if (ID == "enable_wavefront_size32") {
if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
@@ -5974,20 +6043,20 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
setForcedDPP(false);
setForcedSDWA(false);
- if (Name.endswith("_e64_dpp")) {
+ if (Name.ends_with("_e64_dpp")) {
setForcedDPP(true);
setForcedEncodingSize(64);
return Name.substr(0, Name.size() - 8);
- } else if (Name.endswith("_e64")) {
+ } else if (Name.ends_with("_e64")) {
setForcedEncodingSize(64);
return Name.substr(0, Name.size() - 4);
- } else if (Name.endswith("_e32")) {
+ } else if (Name.ends_with("_e32")) {
setForcedEncodingSize(32);
return Name.substr(0, Name.size() - 4);
- } else if (Name.endswith("_dpp")) {
+ } else if (Name.ends_with("_dpp")) {
setForcedDPP(true);
return Name.substr(0, Name.size() - 4);
- } else if (Name.endswith("_sdwa")) {
+ } else if (Name.ends_with("_sdwa")) {
setForcedSDWA(true);
return Name.substr(0, Name.size() - 5);
}
@@ -6010,7 +6079,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
- bool IsMIMG = Name.startswith("image_");
+ bool IsMIMG = Name.starts_with("image_");
while (!trySkipToken(AsmToken::EndOfStatement)) {
OperandMode Mode = OperandMode_Default;
@@ -6150,7 +6219,7 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling) const {
Disabling = Id.consume_front("no");
- if (isGFX940() && !Mnemo.startswith("s_")) {
+ if (isGFX940() && !Mnemo.starts_with("s_")) {
return StringSwitch<unsigned>(Id)
.Case("nt", AMDGPU::CPol::NT)
.Case("sc0", AMDGPU::CPol::SC0)
@@ -6282,13 +6351,13 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
Value == "TH_LOAD_NT_WB") {
return Error(StringLoc, "invalid th value");
- } else if (Value.startswith("TH_ATOMIC_")) {
+ } else if (Value.starts_with("TH_ATOMIC_")) {
Value = Value.drop_front(10);
TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
- } else if (Value.startswith("TH_LOAD_")) {
+ } else if (Value.starts_with("TH_LOAD_")) {
Value = Value.drop_front(8);
TH = AMDGPU::CPol::TH_TYPE_LOAD;
- } else if (Value.startswith("TH_STORE_")) {
+ } else if (Value.starts_with("TH_STORE_")) {
Value = Value.drop_front(9);
TH = AMDGPU::CPol::TH_TYPE_STORE;
} else {
@@ -6733,7 +6802,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
bool Failed = true;
- bool Sat = CntName.endswith("_sat");
+ bool Sat = CntName.ends_with("_sat");
if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
@@ -7206,7 +7275,7 @@ ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
if (!parseId(Str))
return ParseStatus::NoMatch;
- if (!Str.startswith("attr"))
+ if (!Str.starts_with("attr"))
return Error(S, "invalid interpolation attribute");
StringRef Chan = Str.take_back(2);
@@ -7297,7 +7366,7 @@ bool
AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
if (isToken(AsmToken::Identifier)) {
StringRef Tok = getTokenStr();
- if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
+ if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
lex();
return true;
}
@@ -8446,7 +8515,7 @@ bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
Token += Suffix;
StringRef DimId = Token;
- if (DimId.startswith("SQ_RSRC_IMG_"))
+ if (DimId.starts_with("SQ_RSRC_IMG_"))
DimId = DimId.drop_front(12);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
@@ -9129,3 +9198,9 @@ bool AMDGPUOperand::isWaitVDST() const {
bool AMDGPUOperand::isWaitEXP() const {
return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
}
+
+//===----------------------------------------------------------------------===//
+// Split Barrier
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }