aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp607
1 files changed, 449 insertions, 158 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b9443559132f..092845d391a3 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -75,6 +75,7 @@ public:
bool Abs = false;
bool Neg = false;
bool Sext = false;
+ bool Lit = false;
bool hasFPModifiers() const { return Abs || Neg; }
bool hasIntModifiers() const { return Sext; }
@@ -273,6 +274,10 @@ public:
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
+ bool isRegOrImmWithIntT16InputMods() const {
+ return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
+ }
+
bool isRegOrImmWithInt32InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
@@ -293,6 +298,10 @@ public:
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
+ bool isRegOrImmWithFPT16InputMods() const {
+ return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
+ }
+
bool isRegOrImmWithFP32InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
@@ -347,25 +356,20 @@ public:
return isImm() && Imm.Type == ImmT;
}
+ template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
+
bool isImmLiteral() const { return isImmTy(ImmTyNone); }
bool isImmModifier() const {
return isImm() && Imm.Type != ImmTyNone;
}
- bool isClampSI() const { return isImmTy(ImmTyClampSI); }
bool isOModSI() const { return isImmTy(ImmTyOModSI); }
bool isDMask() const { return isImmTy(ImmTyDMask); }
bool isDim() const { return isImmTy(ImmTyDim); }
- bool isUNorm() const { return isImmTy(ImmTyUNorm); }
- bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
- bool isA16() const { return isImmTy(ImmTyA16); }
- bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
- bool isExpVM() const { return isImmTy(ImmTyExpVM); }
- bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
bool isOffen() const { return isImmTy(ImmTyOffen); }
bool isIdxen() const { return isImmTy(ImmTyIdxen); }
bool isAddr64() const { return isImmTy(ImmTyAddr64); }
@@ -378,7 +382,6 @@ public:
bool isLDS() const { return isImmTy(ImmTyLDS); }
bool isCPol() const { return isImmTy(ImmTyCPol); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
- bool isD16() const { return isImmTy(ImmTyD16); }
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
@@ -395,7 +398,6 @@ public:
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
bool isNegHi() const { return isImmTy(ImmTyNegHi); }
- bool isHigh() const { return isImmTy(ImmTyHigh); }
bool isRegOrImm() const {
return isReg() || isImm();
@@ -512,7 +514,15 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
+ bool isVCSrcTB16() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
+ }
+
bool isVCSrcTB16_Lo128() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
+ }
+
+ bool isVCSrcFake16B16_Lo128() const {
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
}
@@ -532,7 +542,15 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
}
+ bool isVCSrcTF16() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
+ }
+
bool isVCSrcTF16_Lo128() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
+ }
+
+ bool isVCSrcFake16F16_Lo128() const {
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
}
@@ -552,10 +570,16 @@ public:
return isVCSrcF64() || isLiteralImm(MVT::i64);
}
+ bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
+
bool isVSrcTB16_Lo128() const {
return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
}
+ bool isVSrcFake16B16_Lo128() const {
+ return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
+ }
+
bool isVSrcB16() const {
return isVCSrcB16() || isLiteralImm(MVT::i16);
}
@@ -588,10 +612,16 @@ public:
return isVCSrcF64() || isLiteralImm(MVT::f64);
}
+ bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
+
bool isVSrcTF16_Lo128() const {
return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
}
+ bool isVSrcFake16F16_Lo128() const {
+ return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
+ }
+
bool isVSrcF16() const {
return isVCSrcF16() || isLiteralImm(MVT::f16);
}
@@ -879,6 +909,10 @@ public:
bool isWaitVDST() const;
bool isWaitEXP() const;
+ auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
+ return std::bind(P, *this);
+ }
+
StringRef getToken() const {
assert(isToken());
return StringRef(Tok.Data, Tok.Length);
@@ -1344,7 +1378,7 @@ public:
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
- if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
@@ -1361,7 +1395,7 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
- if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
@@ -1381,6 +1415,8 @@ public:
bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
+ bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
+
bool isSI() const {
return AMDGPU::isSI(getSTI());
}
@@ -1424,6 +1460,10 @@ public:
return AMDGPU::isGFX11Plus(getSTI());
}
+ bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
+
+ bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
+
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
bool isGFX10_BEncoding() const {
@@ -1456,10 +1496,16 @@ public:
return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
}
- unsigned getNSAMaxSize() const {
- return AMDGPU::getNSAMaxSize(getSTI());
+ unsigned getNSAMaxSize(bool HasSampler = false) const {
+ return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
+ }
+
+ unsigned getMaxNumUserSGPRs() const {
+ return AMDGPU::getMaxNumUserSGPRs(getSTI());
}
+ bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
+
AMDGPUTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
@@ -1493,10 +1539,9 @@ public:
std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
bool RestoreOnFailure);
- bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
- OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) override;
+ bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
@@ -1531,6 +1576,8 @@ public:
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
ParseStatus parseCPol(OperandVector &Operands);
+ ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
+ ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
SMLoc &StringLoc);
@@ -1540,9 +1587,11 @@ public:
bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
bool parseSP3NegModifier();
- ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
+ ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
+ bool HasLit = false);
ParseStatus parseReg(OperandVector &Operands);
- ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
+ ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
+ bool HasLit = false);
ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
bool AllowImm = true);
ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
@@ -1640,11 +1689,14 @@ private:
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
+ bool validateDS(const MCInst &Inst, const OperandVector &Operands);
bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
bool validateDivScale(const MCInst &Inst);
bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
const SMLoc &IDLoc);
+ bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
+ const unsigned CPol);
bool validateExeczVcczOperands(const OperandVector &Operands);
bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
@@ -1733,7 +1785,6 @@ public:
void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
- void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
bool parseDimId(unsigned &Encoding);
ParseStatus parseDim(OperandVector &Operands);
@@ -1987,7 +2038,7 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
// GFX90A allows DPP on 64-bit operands.
(isRegClass(AMDGPU::VReg_64RegClassID) &&
- AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
+ AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
}
bool AMDGPUOperand::isT16VRegWithInputMods() const {
@@ -2096,9 +2147,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
"Can't encode literal as exact 64-bit floating-point operand. "
"Low 32-bits will be set to zero");
+ Val &= 0xffffffff00000000u;
}
- Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
+ Inst.addOperand(MCOperand::createImm(Val));
setImmKindLiteral();
return;
}
@@ -2197,7 +2249,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
}
- Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
+ Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
+ : Lo_32(Val);
+
+ Inst.addOperand(MCOperand::createImm(Val));
setImmKindLiteral();
return;
@@ -2424,23 +2479,21 @@ bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
return false;
}
-bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) {
- return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
+ return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
}
-OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
- SMLoc &StartLoc,
- SMLoc &EndLoc) {
- bool Result =
- ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
+ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
bool PendingErrors = getParser().hasPendingError();
getParser().clearPendingErrors();
if (PendingErrors)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (Result)
- return MatchOperand_NoMatch;
- return MatchOperand_Success;
+ return ParseStatus::NoMatch;
+ return ParseStatus::Success;
}
bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
@@ -2855,7 +2908,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
return nullptr;
}
- if (isHsaAbiVersion3AndAbove(&getSTI())) {
+ if (isHsaAbi(getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
@@ -2864,13 +2917,26 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
}
ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
- bool HasSP3AbsModifier) {
+ bool HasSP3AbsModifier, bool HasLit) {
// TODO: add syntactic sugar for 1/(2*PI)
if (isRegister())
return ParseStatus::NoMatch;
assert(!isModifier());
+ if (!HasLit) {
+ HasLit = trySkipId("lit");
+ if (HasLit) {
+ if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
+ return ParseStatus::Failure;
+ ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
+ if (S.isSuccess() &&
+ !skipToken(AsmToken::RParen, "expected closing parentheses"))
+ return ParseStatus::Failure;
+ return S;
+ }
+ }
+
const auto& Tok = getToken();
const auto& NextTok = peekToken();
bool IsReal = Tok.is(AsmToken::Real);
@@ -2883,6 +2949,9 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
Negate = true;
}
+ AMDGPUOperand::Modifiers Mods;
+ Mods.Lit = HasLit;
+
if (IsReal) {
// Floating-point expressions are not supported.
// Can only allow floating-point literals with an
@@ -2901,6 +2970,8 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
Operands.push_back(
AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
AMDGPUOperand::ImmTyNone, true));
+ AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+ Op.setModifiers(Mods);
return ParseStatus::Success;
@@ -2927,7 +2998,11 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
if (Expr->evaluateAsAbsolute(IntVal)) {
Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
+ AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+ Op.setModifiers(Mods);
} else {
+ if (HasLit)
+ return ParseStatus::NoMatch;
Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
}
@@ -2950,13 +3025,13 @@ ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
}
ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
- bool HasSP3AbsMod) {
+ bool HasSP3AbsMod, bool HasLit) {
ParseStatus Res = parseReg(Operands);
if (!Res.isNoMatch())
return Res;
if (isModifier())
return ParseStatus::NoMatch;
- return parseImm(Operands, HasSP3AbsMod);
+ return parseImm(Operands, HasSP3AbsMod, HasLit);
}
bool
@@ -3052,6 +3127,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
bool AllowImm) {
bool Neg, SP3Neg;
bool Abs, SP3Abs;
+ bool Lit;
SMLoc Loc;
// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
@@ -3071,6 +3147,10 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
return ParseStatus::Failure;
+ Lit = trySkipId("lit");
+ if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
+ return ParseStatus::Failure;
+
Loc = getLoc();
SP3Abs = trySkipToken(AsmToken::Pipe);
if (Abs && SP3Abs)
@@ -3078,12 +3158,15 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
ParseStatus Res;
if (AllowImm) {
- Res = parseRegOrImm(Operands, SP3Abs);
+ Res = parseRegOrImm(Operands, SP3Abs, Lit);
} else {
Res = parseReg(Operands);
}
if (!Res.isSuccess())
- return (SP3Neg || Neg || SP3Abs || Abs) ? ParseStatus::Failure : Res;
+ return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
+
+ if (Lit && !Operands.back()->isImm())
+ Error(Loc, "expected immediate with lit modifier");
if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
return ParseStatus::Failure;
@@ -3091,12 +3174,15 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
return ParseStatus::Failure;
if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
return ParseStatus::Failure;
+ if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
+ return ParseStatus::Failure;
AMDGPUOperand::Modifiers Mods;
Mods.Abs = Abs || SP3Abs;
Mods.Neg = Neg || SP3Neg;
+ Mods.Lit = Lit;
- if (Mods.hasFPModifiers()) {
+ if (Mods.hasFPModifiers() || Lit) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
if (Op.isExpr())
return Error(Op.getStartLoc(), "expected an absolute expression");
@@ -3522,13 +3608,16 @@ bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
return true;
}
+constexpr uint64_t MIMGFlags =
+ SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
+
bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
const SMLoc &IDLoc) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
@@ -3574,7 +3663,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
+ if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
return true;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
@@ -3582,7 +3671,9 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
- int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
+ int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
@@ -3590,7 +3681,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
assert(SrsrcIdx != -1);
assert(SrsrcIdx > VAddr0Idx);
- bool IsA16 = Inst.getOperand(A16Idx).getImm();
+ bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
if (BaseOpcode->BVH) {
if (IsA16 == BaseOpcode->A16)
return true;
@@ -3609,7 +3700,9 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
if (IsNSA) {
- if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
+ if (hasPartialNSAEncoding() &&
+ ExpectedAddrSize >
+ getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
int VAddrLastIdx = SrsrcIdx - 1;
unsigned VAddrLastSize =
AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
@@ -3639,7 +3732,7 @@ bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
if (!Desc.mayLoad() || !Desc.mayStore())
return true; // Not atomic
@@ -3677,7 +3770,7 @@ bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
@@ -3854,7 +3947,7 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
@@ -4106,8 +4199,9 @@ bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
return true;
Error(getSMEMOffsetLoc(Operands),
- (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
- "expected a 21-bit signed offset");
+ isGFX12Plus() ? "expected a 24-bit signed offset"
+ : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
+ : "expected a 21-bit signed offset");
return false;
}
@@ -4189,21 +4283,35 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
const OperandVector &Operands) {
const unsigned Opc = Inst.getOpcode();
int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
- if (DppCtrlIdx < 0)
- return true;
- unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
+ if (DppCtrlIdx >= 0) {
+ unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
- if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
- // DPP64 is supported for row_newbcast only.
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- if (Src0Idx >= 0 &&
- getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
+ if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
+ AMDGPU::isDPALU_DPP(MII.get(Opc))) {
+ // DP ALU DPP is supported for row_newbcast only on GFX9*
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
- Error(S, "64 bit dpp only supports row_newbcast");
+ Error(S, "DP ALU dpp only supports row_newbcast");
return false;
}
}
+ int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
+ bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
+
+ if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ if (Src1Idx >= 0) {
+ const MCOperand &Src1 = Inst.getOperand(Src1Idx);
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ if (Src1.isImm() ||
+ (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
+ Error(Op.getStartLoc(), "invalid operand for instruction");
+ return false;
+ }
+ }
+ }
+
return true;
}
@@ -4241,7 +4349,19 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
continue;
if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
- uint32_t Value = static_cast<uint32_t>(MO.getImm());
+ uint64_t Value = static_cast<uint64_t>(MO.getImm());
+ bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
+ AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
+ bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
+
+ if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
+ Error(getLitLoc(Operands), "invalid operand for instruction");
+ return false;
+ }
+
+ if (IsFP64 && IsValid32Op)
+ Value = Hi_32(Value);
+
if (NumLiterals == 0 || LiteralValue != Value) {
LiteralValue = Value;
++NumLiterals;
@@ -4405,6 +4525,29 @@ bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
return false;
}
+bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
+ const OperandVector &Operands) {
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & SIInstrFlags::DS) == 0)
+ return true;
+ if (TSFlags & SIInstrFlags::GWS)
+ return validateGWS(Inst, Operands);
+ // Only validate GDS for non-GWS instructions.
+ if (hasGDS())
+ return true;
+ int GDSIdx =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
+ if (GDSIdx < 0)
+ return true;
+ unsigned GDS = Inst.getOperand(GDSIdx).getImm();
+ if (GDS) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
+ Error(S, "gds modifier is not supported on this GPU");
+ return false;
+ }
+ return true;
+}
+
// gfx90a has an undocumented limitation:
// DS_GWS opcodes must use even aligned registers.
bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
@@ -4443,6 +4586,9 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
unsigned CPol = Inst.getOperand(CPolPos).getImm();
+ if (isGFX12Plus())
+ return validateTHAndScopeBits(Inst, Operands, CPol);
+
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if (TSFlags & SIInstrFlags::SMRD) {
if (CPol && (isSI() || isCI())) {
@@ -4457,11 +4603,17 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
}
if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
- SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
- StringRef CStr(S.getPointer());
- S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
- Error(S, "scc is not supported on this GPU");
- return false;
+ const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
+ SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
+ SIInstrFlags::FLAT;
+ if (!(TSFlags & AllowSCCModifier)) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ StringRef CStr(S.getPointer());
+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
+ Error(S,
+ "scc modifier is not supported for this instruction on this GPU");
+ return false;
+ }
}
if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
@@ -4488,6 +4640,60 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
return true;
}
+bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
+ const OperandVector &Operands,
+ const unsigned CPol) {
+ const unsigned TH = CPol & AMDGPU::CPol::TH;
+ const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
+
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &TID = MII.get(Opcode);
+
+ auto PrintError = [&](StringRef Msg) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ Error(S, Msg);
+ return false;
+ };
+
+ if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
+ (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
+ (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
+ return PrintError("instruction must use th:TH_ATOMIC_RETURN");
+
+ if (TH == 0)
+ return true;
+
+ if ((TID.TSFlags & SIInstrFlags::SMRD) &&
+ ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
+ (TH == AMDGPU::CPol::TH_NT_HT)))
+ return PrintError("invalid th value for SMEM instruction");
+
+ if (TH == AMDGPU::CPol::TH_BYPASS) {
+ if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
+ CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
+ (Scope == AMDGPU::CPol::SCOPE_SYS &&
+ !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
+ return PrintError("scope and th combination is not valid");
+ }
+
+ bool IsStore = TID.mayStore();
+ bool IsAtomic =
+ TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
+
+ if (IsAtomic) {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
+ return PrintError("invalid th value for atomic instructions");
+ } else if (IsStore) {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
+ return PrintError("invalid th value for store instructions");
+ } else {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
+ return PrintError("invalid th value for load instructions");
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
if (!isGFX11Plus())
return true;
@@ -4613,7 +4819,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid register class: vgpr tuples must be 64 bit aligned");
return false;
}
- if (!validateGWS(Inst, Operands)) {
+ if (!validateDS(Inst, Operands)) {
return false;
}
@@ -4888,7 +5094,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
- if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
+ if (!isHsaAbi(getSTI()))
return TokError("directive only supported for amdhsa OS");
StringRef KernelName;
@@ -4905,6 +5111,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
uint64_t NextFreeVGPR = 0;
uint64_t AccumOffset = 0;
uint64_t SharedVGPRCount = 0;
+ uint64_t PreloadLength = 0;
+ uint64_t PreloadOffset = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
@@ -4973,6 +5181,28 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
Val, ValRange);
if (Val)
ImpliedUserSGPRCount += 4;
+ } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+ if (!hasKernargPreload())
+ return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+
+ if (Val > getMaxNumUserSGPRs())
+ return OutOfRangeError(ValRange);
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+ ValRange);
+ if (Val) {
+ ImpliedUserSGPRCount += Val;
+ PreloadLength = Val;
+ }
+ } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+ if (!hasKernargPreload())
+ return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+
+ if (Val >= 1024)
+ return OutOfRangeError(ValRange);
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+ ValRange);
+ if (Val)
+ PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
@@ -5112,7 +5342,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
@@ -5122,17 +5352,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
if (IVersion.Major < 10)
@@ -5218,6 +5448,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
UserSGPRCount);
+ if (PreloadLength && KD.kernarg_size &&
+ (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
+ return TokError("Kernarg preload length + offset is larger than the "
+ "kernarg segment size");
+
if (isGFX90A()) {
if (!Seen.contains(".amdhsa_accum_offset"))
return TokError(".amdhsa_accum_offset directive is required");
@@ -5419,33 +5654,15 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
}
bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
- const char *AssemblerDirectiveBegin;
- const char *AssemblerDirectiveEnd;
- std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
- isHsaAbiVersion3AndAbove(&getSTI())
- ? std::pair(HSAMD::V3::AssemblerDirectiveBegin,
- HSAMD::V3::AssemblerDirectiveEnd)
- : std::pair(HSAMD::AssemblerDirectiveBegin,
- HSAMD::AssemblerDirectiveEnd);
-
- if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
- return Error(getLoc(),
- (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
- "not available on non-amdhsa OSes")).str());
- }
+ assert(isHsaAbi(getSTI()));
std::string HSAMetadataString;
- if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
- HSAMetadataString))
+ if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
+ HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
return true;
- if (isHsaAbiVersion3AndAbove(&getSTI())) {
- if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
- return Error(getLoc(), "invalid HSA metadata");
- } else {
- if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
- return Error(getLoc(), "invalid HSA metadata");
- }
+ if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
+ return Error(getLoc(), "invalid HSA metadata");
return false;
}
@@ -5588,7 +5805,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if (isHsaAbiVersion3AndAbove(&getSTI())) {
+ if (isHsaAbi(getSTI())) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
@@ -5611,8 +5828,12 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_amdgpu_isa")
return ParseDirectiveISAVersion();
- if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
- return ParseDirectiveHSAMetadata();
+ if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
+ return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
+ Twine(" directive is "
+ "not available on non-amdhsa OSes"))
+ .str());
+ }
}
if (IDVal == ".amdgcn_target")
@@ -5946,6 +6167,47 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
}
ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
+ if (isGFX12Plus()) {
+ SMLoc StringLoc = getLoc();
+
+ int64_t CPolVal = 0;
+ ParseStatus ResTH = ParseStatus::NoMatch;
+ ParseStatus ResScope = ParseStatus::NoMatch;
+
+ for (;;) {
+ if (ResTH.isNoMatch()) {
+ int64_t TH;
+ ResTH = parseTH(Operands, TH);
+ if (ResTH.isFailure())
+ return ResTH;
+ if (ResTH.isSuccess()) {
+ CPolVal |= TH;
+ continue;
+ }
+ }
+
+ if (ResScope.isNoMatch()) {
+ int64_t Scope;
+ ResScope = parseScope(Operands, Scope);
+ if (ResScope.isFailure())
+ return ResScope;
+ if (ResScope.isSuccess()) {
+ CPolVal |= Scope;
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ if (ResTH.isNoMatch() && ResScope.isNoMatch())
+ return ParseStatus::NoMatch;
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
+ AMDGPUOperand::ImmTyCPol));
+ return ParseStatus::Success;
+ }
+
StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
SMLoc OpLoc = getLoc();
unsigned Enabled = 0, Seen = 0;
@@ -5981,6 +6243,95 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
return ParseStatus::Success;
}
+ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
+ int64_t &Scope) {
+ Scope = AMDGPU::CPol::SCOPE_CU; // default;
+
+ StringRef Value;
+ SMLoc StringLoc;
+ ParseStatus Res;
+
+ Res = parseStringWithPrefix("scope", Value, StringLoc);
+ if (!Res.isSuccess())
+ return Res;
+
+ Scope = StringSwitch<int64_t>(Value)
+ .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
+ .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
+ .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
+ .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
+ .Default(0xffffffff);
+
+ if (Scope == 0xffffffff)
+ return Error(StringLoc, "invalid scope value");
+
+ return ParseStatus::Success;
+}
+
+ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
+ TH = AMDGPU::CPol::TH_RT; // default
+
+ StringRef Value;
+ SMLoc StringLoc;
+ ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
+ if (!Res.isSuccess())
+ return Res;
+
+ if (Value == "TH_DEFAULT")
+ TH = AMDGPU::CPol::TH_RT;
+ else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
+ Value == "TH_LOAD_NT_WB") {
+ return Error(StringLoc, "invalid th value");
+ } else if (Value.startswith("TH_ATOMIC_")) {
+ Value = Value.drop_front(10);
+ TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
+ } else if (Value.startswith("TH_LOAD_")) {
+ Value = Value.drop_front(8);
+ TH = AMDGPU::CPol::TH_TYPE_LOAD;
+ } else if (Value.startswith("TH_STORE_")) {
+ Value = Value.drop_front(9);
+ TH = AMDGPU::CPol::TH_TYPE_STORE;
+ } else {
+ return Error(StringLoc, "invalid th value");
+ }
+
+ if (Value == "BYPASS")
+ TH |= AMDGPU::CPol::TH_REAL_BYPASS;
+
+ if (TH != 0) {
+ if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
+ TH |= StringSwitch<int64_t>(Value)
+ .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("RT", AMDGPU::CPol::TH_RT)
+ .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
+ .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
+ AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
+ .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
+ AMDGPU::CPol::TH_ATOMIC_NT)
+ .Default(0xffffffff);
+ else
+ TH |= StringSwitch<int64_t>(Value)
+ .Case("RT", AMDGPU::CPol::TH_RT)
+ .Case("NT", AMDGPU::CPol::TH_NT)
+ .Case("HT", AMDGPU::CPol::TH_HT)
+ .Case("LU", AMDGPU::CPol::TH_LU)
+ .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
+ .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
+ .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
+ .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
+ .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
+ .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
+ .Default(0xffffffff);
+ }
+
+ if (TH == 0xffffffff)
+ return Error(StringLoc, "invalid th value");
+
+ return ParseStatus::Success;
+}
+
static void addOptionalImmOperand(
MCInst& Inst, const OperandVector& Operands,
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
@@ -7578,66 +7929,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
}
//===----------------------------------------------------------------------===//
-// SMEM
-//===----------------------------------------------------------------------===//
-
-void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
- OptionalImmIndexMap OptionalIdx;
- bool IsAtomicReturn = false;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
- if (!Op.isCPol())
- continue;
- IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
- break;
- }
-
- if (!IsAtomicReturn) {
- int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
- if (NewOpc != -1)
- Inst.setOpcode(NewOpc);
- }
-
- IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
- SIInstrFlags::IsAtomicRet;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- if (IsAtomicReturn && i == 1)
- Op.addRegOperands(Inst, 1);
- continue;
- }
-
- // Handle the case where soffset is an immediate
- if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
- Op.addImmOperands(Inst, 1);
- continue;
- }
-
- // Handle tokens like 'offen' which are sometimes hard-coded into the
- // asm string. There are no MCInst operands for these.
- if (Op.isToken()) {
- continue;
- }
- assert(Op.isImm());
-
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = i;
- }
-
- if ((int)Inst.getNumOperands() <=
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
- addOptionalImmOperand(Inst, Operands, OptionalIdx,
- AMDGPUOperand::ImmTySMEMOffsetMod);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
-}
-
-//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
@@ -7704,7 +7995,7 @@ void AMDGPUAsmParser::onBeginOfFile() {
// TODO: Should try to check code object version from directive???
AMDGPU::getAmdhsaCodeObjectVersion());
- if (isHsaAbiVersion3AndAbove(&getSTI()))
+ if (isHsaAbi(getSTI()))
getTargetStreamer().EmitDirectiveAMDGCNTarget();
}