summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp1072
1 files changed, 833 insertions, 239 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index af4a47935e3f..00032c7d4ea5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -113,9 +114,7 @@ public:
ImmTyInstOffset,
ImmTyOffset0,
ImmTyOffset1,
- ImmTyDLC,
- ImmTyGLC,
- ImmTySLC,
+ ImmTyCPol,
ImmTySWZ,
ImmTyTFE,
ImmTyD16,
@@ -299,6 +298,8 @@ public:
return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
}
+ bool isVRegWithInputMods() const;
+
bool isSDWAOperand(MVT type) const;
bool isSDWAFP16Operand() const;
bool isSDWAFP32Operand() const;
@@ -336,12 +337,7 @@ public:
bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
- bool isDLC() const { return isImmTy(ImmTyDLC); }
- bool isGLC() const { return isImmTy(ImmTyGLC); }
- // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
- // value of the GLC operand.
- bool isGLC_1() const { return isImmTy(ImmTyGLC); }
- bool isSLC() const { return isImmTy(ImmTySLC); }
+ bool isCPol() const { return isImmTy(ImmTyCPol); }
bool isSWZ() const { return isImmTy(ImmTySWZ); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
@@ -449,6 +445,26 @@ public:
return isSSrcF16();
}
+ bool isSSrcV2FP32() const {
+ llvm_unreachable("cannot happen");
+ return isSSrcF32();
+ }
+
+ bool isSCSrcV2FP32() const {
+ llvm_unreachable("cannot happen");
+ return isSCSrcF32();
+ }
+
+ bool isSSrcV2INT32() const {
+ llvm_unreachable("cannot happen");
+ return isSSrcB32();
+ }
+
+ bool isSCSrcV2INT32() const {
+ llvm_unreachable("cannot happen");
+ return isSCSrcB32();
+ }
+
bool isSSrcOrLdsB32() const {
return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
isLiteralImm(MVT::i32) || isExpr();
@@ -502,6 +518,22 @@ public:
return isVSrcB16() || isLiteralImm(MVT::v2i16);
}
+ bool isVCSrcV2FP32() const {
+ return isVCSrcF64();
+ }
+
+ bool isVSrcV2FP32() const {
+ return isVSrcF64() || isLiteralImm(MVT::v2f32);
+ }
+
+ bool isVCSrcV2INT32() const {
+ return isVCSrcB64();
+ }
+
+ bool isVSrcV2INT32() const {
+ return isVSrcB64() || isLiteralImm(MVT::v2i32);
+ }
+
bool isVSrcF32() const {
return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
}
@@ -542,6 +574,102 @@ public:
return isVISrcF16() || isVISrcB32();
}
+ bool isVISrc_64B64() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
+ }
+
+ bool isVISrc_64F64() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
+ }
+
+ bool isVISrc_64V2FP32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
+ }
+
+ bool isVISrc_64V2INT32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
+ }
+
+ bool isVISrc_256B64() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
+ }
+
+ bool isVISrc_256F64() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
+ }
+
+ bool isVISrc_128B16() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
+ }
+
+ bool isVISrc_128V2B16() const {
+ return isVISrc_128B16();
+ }
+
+ bool isVISrc_128B32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
+ }
+
+ bool isVISrc_128F32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
+ }
+
+ bool isVISrc_256V2FP32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
+ }
+
+ bool isVISrc_256V2INT32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
+ }
+
+ bool isVISrc_512B32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
+ }
+
+ bool isVISrc_512B16() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
+ }
+
+ bool isVISrc_512V2B16() const {
+ return isVISrc_512B16();
+ }
+
+ bool isVISrc_512F32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
+ }
+
+ bool isVISrc_512F16() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
+ }
+
+ bool isVISrc_512V2F16() const {
+ return isVISrc_512F16() || isVISrc_512B32();
+ }
+
+ bool isVISrc_1024B32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
+ }
+
+ bool isVISrc_1024B16() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
+ }
+
+ bool isVISrc_1024V2B16() const {
+ return isVISrc_1024B16();
+ }
+
+ bool isVISrc_1024F32() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
+ }
+
+ bool isVISrc_1024F16() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
+ }
+
+ bool isVISrc_1024V2F16() const {
+ return isVISrc_1024F16() || isVISrc_1024B32();
+ }
+
bool isAISrcB32() const {
return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
}
@@ -566,6 +694,14 @@ public:
return isAISrcF16() || isAISrcB32();
}
+ bool isAISrc_64B64() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
+ }
+
+ bool isAISrc_64F64() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
+ }
+
bool isAISrc_128B32() const {
return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
}
@@ -590,6 +726,22 @@ public:
return isAISrc_128F16() || isAISrc_128B32();
}
+ bool isVISrc_128F16() const {
+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
+ }
+
+ bool isVISrc_128V2F16() const {
+ return isVISrc_128F16() || isVISrc_128B32();
+ }
+
+ bool isAISrc_256B64() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
+ }
+
+ bool isAISrc_256F64() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
+ }
+
bool isAISrc_512B32() const {
return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
}
@@ -837,9 +989,7 @@ public:
case ImmTyInstOffset: OS << "InstOffset"; break;
case ImmTyOffset0: OS << "Offset0"; break;
case ImmTyOffset1: OS << "Offset1"; break;
- case ImmTyDLC: OS << "DLC"; break;
- case ImmTyGLC: OS << "GLC"; break;
- case ImmTySLC: OS << "SLC"; break;
+ case ImmTyCPol: OS << "CPol"; break;
case ImmTySWZ: OS << "SWZ"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
@@ -1021,6 +1171,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ForcedDPP = false;
bool ForcedSDWA = false;
KernelScopeInfo KernelScope;
+ unsigned CPolSeen;
/// @name Auto-generated Match Functions
/// {
@@ -1061,7 +1212,8 @@ private:
bool ParseDirectiveHSACodeObjectISA();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
- bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
+ // TODO: Possibly make subtargetHasRegister const.
+ bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
bool ParseDirectiveAMDGPUHsaKernel();
bool ParseDirectiveISAVersion();
@@ -1105,7 +1257,7 @@ private:
bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
unsigned RegWidth);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
- bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
+ bool IsAtomic, bool IsLds = false);
void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
bool IsGdsHardcoded);
@@ -1140,7 +1292,7 @@ public:
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
- if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
@@ -1157,7 +1309,7 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
- if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
+ if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
@@ -1165,10 +1317,6 @@ public:
}
}
- bool hasXNACK() const {
- return AMDGPU::hasXNACK(getSTI());
- }
-
bool hasMIMG_R128() const {
return AMDGPU::hasMIMG_R128(getSTI());
}
@@ -1181,6 +1329,8 @@ public:
return AMDGPU::hasGFX10A16(getSTI());
}
+ bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
+
bool isSI() const {
return AMDGPU::isSI(getSTI());
}
@@ -1197,6 +1347,10 @@ public:
return AMDGPU::isGFX9(getSTI());
}
+ bool isGFX90A() const {
+ return AMDGPU::isGFX90A(getSTI());
+ }
+
bool isGFX9Plus() const {
return AMDGPU::isGFX9Plus(getSTI());
}
@@ -1219,6 +1373,10 @@ public:
return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
}
+ bool hasArchitectedFlatScratch() const {
+ return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
+ }
+
bool hasSGPR102_SGPR103() const {
return !isVI() && !isGFX9();
}
@@ -1294,8 +1452,9 @@ public:
bool (*ConvertResult)(int64_t&) = nullptr);
OperandMatchResultTy
- parseNamedBit(const char *Name, OperandVector &Operands,
+ parseNamedBit(StringRef Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
+ OperandMatchResultTy parseCPol(OperandVector &Operands);
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
StringRef &Value,
SMLoc &StringLoc);
@@ -1379,14 +1538,19 @@ private:
bool validateMIMGAddrSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGDim(const MCInst &Inst);
- bool validateLdsDirect(const MCInst &Inst);
+ bool validateMIMGMSAA(const MCInst &Inst);
bool validateOpSel(const MCInst &Inst);
+ bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
bool validateVccOperand(unsigned Reg) const;
bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
+ bool validateAGPRLdSt(const MCInst &Inst) const;
+ bool validateVGPRAlign(const MCInst &Inst) const;
+ bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
bool validateDivScale(const MCInst &Inst);
bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
const SMLoc &IDLoc);
+ Optional<StringRef> validateLdsDirect(const MCInst &Inst);
unsigned getConstantBusLimit(unsigned Opcode) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -1403,6 +1567,7 @@ private:
bool isId(const AsmToken &Token, const StringRef Id) const;
bool isToken(const AsmToken::TokenKind Kind) const;
bool trySkipId(const StringRef Id);
+ bool trySkipId(const StringRef Pref, const StringRef Id);
bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
bool trySkipToken(const AsmToken::TokenKind Kind);
bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
@@ -1420,6 +1585,8 @@ private:
void lex();
public:
+ void onBeginOfFile() override;
+
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
@@ -1451,16 +1618,12 @@ public:
OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
int64_t parseGPRIdxMacro();
- void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
- void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
- void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
- void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
+ void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
+ void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
+ void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
- AMDGPUOperand::Ptr defaultDLC() const;
- AMDGPUOperand::Ptr defaultGLC() const;
- AMDGPUOperand::Ptr defaultGLC_1() const;
- AMDGPUOperand::Ptr defaultSLC() const;
+ AMDGPUOperand::Ptr defaultCPol() const;
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
AMDGPUOperand::Ptr defaultSMEMOffset() const;
@@ -1474,6 +1637,8 @@ public:
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx);
void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
@@ -1482,6 +1647,9 @@ public:
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
+ void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
+
+ bool parseDimId(unsigned &Encoding);
OperandMatchResultTy parseDim(OperandVector &Operands);
OperandMatchResultTy parseDPP8(OperandVector &Operands);
OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
@@ -1551,11 +1719,16 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
return &APFloat::IEEEdouble();
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
@@ -1715,7 +1888,8 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
// literal goes into the lower half and the upper half is zero. We also
// require that the literal may be losslesly converted to f16.
MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
- (type == MVT::v2i16)? MVT::i16 : type;
+ (type == MVT::v2i16)? MVT::i16 :
+ (type == MVT::v2f32)? MVT::f32 : type;
APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
@@ -1725,6 +1899,13 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
}
+bool AMDGPUOperand::isVRegWithInputMods() const {
+ return isRegClass(AMDGPU::VGPR_32RegClassID) ||
+ // GFX90A allows DPP on 64-bit operands.
+ (isRegClass(AMDGPU::VReg_64RegClassID) &&
+ AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
+}
+
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
return isVReg32();
@@ -1751,8 +1932,9 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
}
bool AMDGPUOperand::isBoolReg() const {
- return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
- (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
+ auto FB = AsmParser->getFeatureBits();
+ return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
+ (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
}
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
@@ -1806,6 +1988,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
@@ -1849,7 +2032,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16: {
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT32: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
@@ -1881,6 +2068,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
if (isSafeTruncation(Val, 32) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -1897,6 +2088,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
setImmKindConst();
@@ -2000,6 +2192,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 4: return AMDGPU::VReg_128RegClassID;
case 5: return AMDGPU::VReg_160RegClassID;
case 6: return AMDGPU::VReg_192RegClassID;
+ case 7: return AMDGPU::VReg_224RegClassID;
case 8: return AMDGPU::VReg_256RegClassID;
case 16: return AMDGPU::VReg_512RegClassID;
case 32: return AMDGPU::VReg_1024RegClassID;
@@ -2022,6 +2215,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 4: return AMDGPU::SGPR_128RegClassID;
case 5: return AMDGPU::SGPR_160RegClassID;
case 6: return AMDGPU::SGPR_192RegClassID;
+ case 7: return AMDGPU::SGPR_224RegClassID;
case 8: return AMDGPU::SGPR_256RegClassID;
case 16: return AMDGPU::SGPR_512RegClassID;
}
@@ -2034,6 +2228,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 4: return AMDGPU::AReg_128RegClassID;
case 5: return AMDGPU::AReg_160RegClassID;
case 6: return AMDGPU::AReg_192RegClassID;
+ case 7: return AMDGPU::AReg_224RegClassID;
case 8: return AMDGPU::AReg_256RegClassID;
case 16: return AMDGPU::AReg_512RegClassID;
case 32: return AMDGPU::AReg_1024RegClassID;
@@ -2529,7 +2724,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
return nullptr;
}
- if (isHsaAbiVersion3(&getSTI())) {
+ if (isHsaAbiVersion3Or4(&getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
@@ -3200,7 +3395,7 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
return true;
unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
- unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
+ unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
if (DMask == 0)
DMask = 1;
@@ -3230,6 +3425,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+ int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
assert(VAddr0Idx != -1);
assert(SrsrcIdx != -1);
@@ -3241,22 +3437,26 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
unsigned Dim = Inst.getOperand(DimIdx).getImm();
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
- unsigned VAddrSize =
+ unsigned ActualAddrSize =
IsNSA ? SrsrcIdx - VAddr0Idx
: AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
+ bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
+
+ unsigned ExpectedAddrSize =
+ AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
- unsigned AddrSize = BaseOpcode->NumExtraArgs +
- (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
- (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
- (BaseOpcode->LodOrClampOrMip ? 1 : 0);
if (!IsNSA) {
- if (AddrSize > 8)
- AddrSize = 16;
- else if (AddrSize > 4)
- AddrSize = 8;
+ if (ExpectedAddrSize > 8)
+ ExpectedAddrSize = 16;
+
+ // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
+ // This provides backward compatibility for assembly created
+ // before 160b/192b/224b types were directly supported.
+ if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
+ return true;
}
- return VAddrSize == AddrSize;
+ return ActualAddrSize == ExpectedAddrSize;
}
bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
@@ -3298,6 +3498,29 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
}
+bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ return true;
+
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+
+ if (!BaseOpcode->MSAA)
+ return true;
+
+ int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+ assert(DimIdx != -1);
+
+ unsigned Dim = Inst.getOperand(DimIdx).getImm();
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
+
+ return DimInfo->MSAA;
+}
+
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
{
switch (Opcode) {
@@ -3559,7 +3782,7 @@ static bool IsRevOpcode(const unsigned Opcode)
}
}
-bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
+Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
using namespace SIInstrFlags;
const unsigned Opcode = Inst.getOpcode();
@@ -3567,33 +3790,29 @@ bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
// lds_direct register is defined so that it can be used
// with 9-bit operands only. Ignore encodings which do not accept these.
- if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
- return true;
+ const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
+ if ((Desc.TSFlags & Enc) == 0)
+ return None;
- const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
- const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
- const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+ for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
+ auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
+ if (SrcIdx == -1)
+ break;
+ const auto &Src = Inst.getOperand(SrcIdx);
+ if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
- const int SrcIndices[] = { Src1Idx, Src2Idx };
+ if (isGFX90A())
+ return StringRef("lds_direct is not supported on this GPU");
- // lds_direct cannot be specified as either src1 or src2.
- for (int SrcIdx : SrcIndices) {
- if (SrcIdx == -1) break;
- const MCOperand &Src = Inst.getOperand(SrcIdx);
- if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
- return false;
+ if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
+ return StringRef("lds_direct cannot be used with this instruction");
+
+ if (SrcName != OpName::src0)
+ return StringRef("lds_direct may be used as src0 only");
}
}
- if (Src0Idx == -1)
- return true;
-
- const MCOperand &Src = Inst.getOperand(Src0Idx);
- if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
- return true;
-
- // lds_direct is specified as src0. Check additional limitations.
- return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
+ return None;
}
SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
@@ -3624,7 +3843,7 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
// For FLAT segment the offset must be positive;
// MSB is ignored and forced to zero.
- if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
+ if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
if (!isIntN(OffsetSize, Op.getImm())) {
Error(getFlatOffsetLoc(Operands),
@@ -3733,6 +3952,28 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
return true;
}
+bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
+ const OperandVector &Operands) {
+ const unsigned Opc = Inst.getOpcode();
+ int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
+ if (DppCtrlIdx < 0)
+ return true;
+ unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
+
+ if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
+ // DPP64 is supported for row_newbcast only.
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ if (Src0Idx >= 0 &&
+ getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
+ Error(S, "64 bit dpp only supports row_newbcast");
+ return false;
+ }
+ }
+
+ return true;
+}
+
// Check if VCC register matches wavefront size
bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
auto FB = getFeatureBits();
@@ -3802,18 +4043,148 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
return true;
}
+// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
+static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
+ const MCRegisterInfo *MRI) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
+ if (OpIdx < 0)
+ return -1;
+
+ const MCOperand &Op = Inst.getOperand(OpIdx);
+ if (!Op.isReg())
+ return -1;
+
+ unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
+ auto Reg = Sub ? Sub : Op.getReg();
+ const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
+ return AGPR32.contains(Reg) ? 1 : 0;
+}
+
+bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
+ SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
+ SIInstrFlags::DS)) == 0)
+ return true;
+
+ uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
+ : AMDGPU::OpName::vdata;
+
+ const MCRegisterInfo *MRI = getMRI();
+ int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
+ int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
+
+ if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
+ int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
+ if (Data2Areg >= 0 && Data2Areg != DataAreg)
+ return false;
+ }
+
+ auto FB = getFeatureBits();
+ if (FB[AMDGPU::FeatureGFX90AInsts]) {
+ if (DataAreg < 0 || DstAreg < 0)
+ return true;
+ return DstAreg == DataAreg;
+ }
+
+ return DstAreg < 1 && DataAreg < 1;
+}
+
+bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
+ auto FB = getFeatureBits();
+ if (!FB[AMDGPU::FeatureGFX90AInsts])
+ return true;
+
+ const MCRegisterInfo *MRI = getMRI();
+ const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
+ const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ const MCOperand &Op = Inst.getOperand(I);
+ if (!Op.isReg())
+ continue;
+
+ unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
+ if (!Sub)
+ continue;
+
+ if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
+ return false;
+ if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
+ return false;
+ }
+
+ return true;
+}
+
+// gfx90a has an undocumented limitation:
+// DS_GWS opcodes must use even aligned registers.
+bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
+ const OperandVector &Operands) {
+ if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
+ return true;
+
+ int Opc = Inst.getOpcode();
+ if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
+ Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
+ return true;
+
+ const MCRegisterInfo *MRI = getMRI();
+ const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
+ int Data0Pos =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
+ assert(Data0Pos != -1);
+ auto Reg = Inst.getOperand(Data0Pos).getReg();
+ auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
+ if (RegIdx & 1) {
+ SMLoc RegLoc = getRegLoc(Reg, Operands);
+ Error(RegLoc, "vgpr must be even aligned");
+ return false;
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
const OperandVector &Operands,
const SMLoc &IDLoc) {
- int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
- AMDGPU::OpName::glc1);
- if (GLCPos != -1) {
- // -1 is set by GLC_1 default operand. In all cases "glc" must be present
- // in the asm string, and the default value means it is not present.
- if (Inst.getOperand(GLCPos).getImm() == -1) {
+ int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::cpol);
+ if (CPolPos == -1)
+ return true;
+
+ unsigned CPol = Inst.getOperand(CPolPos).getImm();
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & (SIInstrFlags::SMRD)) &&
+ (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
+ Error(IDLoc, "invalid cache policy for SMRD instruction");
+ return false;
+ }
+
+ if (isGFX90A() && (CPol & CPol::SCC)) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ StringRef CStr(S.getPointer());
+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
+ Error(S, "scc is not supported on this GPU");
+ return false;
+ }
+
+ if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
+ return true;
+
+ if (TSFlags & SIInstrFlags::IsAtomicRet) {
+ if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
Error(IDLoc, "instruction must use glc");
return false;
}
+ } else {
+ if (CPol & CPol::GLC) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ StringRef CStr(S.getPointer());
+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
+ Error(S, "instruction must not use glc");
+ return false;
+ }
}
return true;
@@ -3822,9 +4193,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc,
const OperandVector &Operands) {
- if (!validateLdsDirect(Inst)) {
- Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
- "invalid use of lds_direct");
+ if (auto ErrMsg = validateLdsDirect(Inst)) {
+ Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
return false;
}
if (!validateSOPLiteral(Inst)) {
@@ -3851,6 +4221,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid op_sel operand");
return false;
}
+ if (!validateDPP(Inst, Operands)) {
+ return false;
+ }
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {
Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
@@ -3861,6 +4234,11 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
Error(IDLoc, "dim modifier is required on this GPU");
return false;
}
+ if (!validateMIMGMSAA(Inst)) {
+ Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
+ "invalid dim; must be MSAA type");
+ return false;
+ }
if (!validateMIMGDataSize(Inst)) {
Error(IDLoc,
"image data size does not match dmask and tfe");
@@ -3893,6 +4271,26 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMAIAccWrite(Inst, Operands)) {
return false;
}
+ if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
+ return false;
+ }
+
+ if (!validateAGPRLdSt(Inst)) {
+ Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
+ ? "invalid register class: data and dst should be all VGPR or AGPR"
+ : "invalid register class: agpr loads and stores not supported on this GPU"
+ );
+ return false;
+ }
+ if (!validateVGPRAlign(Inst)) {
+ Error(IDLoc,
+ "invalid register class: vgpr tuples must be 64 bit aligned");
+ return false;
+ }
+ if (!validateGWS(Inst, Operands)) {
+ return false;
+ }
+
if (!validateDivScale(Inst)) {
Error(IDLoc, "ABS not allowed in VOP3B instructions");
return false;
@@ -4062,21 +4460,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
- std::string Target;
-
- SMLoc TargetStart = getLoc();
- if (getParser().parseEscapedString(Target))
+ std::string TargetIDDirective;
+ SMLoc TargetStart = getTok().getLoc();
+ if (getParser().parseEscapedString(TargetIDDirective))
return true;
- SMRange TargetRange = SMRange(TargetStart, getLoc());
- std::string ExpectedTarget;
- raw_string_ostream ExpectedTargetOS(ExpectedTarget);
- IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
+ SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
+ if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
+ return getParser().Error(TargetRange.Start,
+ (Twine(".amdgcn_target directive's target id ") +
+ Twine(TargetIDDirective) +
+ Twine(" does not match the specified target id ") +
+ Twine(getTargetStreamer().getTargetID()->toString())).str());
- if (Target != ExpectedTargetOS.str())
- return Error(TargetRange.Start, "target must match options", TargetRange);
-
- getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
return false;
}
@@ -4143,12 +4539,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
+ uint64_t AccumOffset = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
unsigned UserSGPRCount = 0;
bool ReserveVCC = true;
bool ReserveFlatScr = true;
- bool ReserveXNACK = hasXNACK();
Optional<bool> EnableWavefrontSize32;
while (true) {
@@ -4191,7 +4587,15 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.private_segment_fixed_size = Val;
+ } else if (ID == ".amdhsa_kernarg_size") {
+ if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+ return OutOfRangeError(ValRange);
+ KD.kernarg_size = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ if (hasArchitectedFlatScratch())
+ return Error(IDRange.Start,
+ "directive is not supported with architected flat scratch",
+ IDRange);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
Val, ValRange);
@@ -4222,6 +4626,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (Val)
UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
+ if (hasArchitectedFlatScratch())
+ return Error(IDRange.Start,
+ "directive is not supported with architected flat scratch",
+ IDRange);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
ValRange);
@@ -4241,10 +4649,20 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
Val, ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
- PARSE_BITS_ENTRY(
- KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
- ValRange);
+ if (hasArchitectedFlatScratch())
+ return Error(IDRange.Start,
+ "directive is not supported with architected flat scratch",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ } else if (ID == ".amdhsa_enable_private_segment") {
+ if (!hasArchitectedFlatScratch())
+ return Error(
+ IDRange.Start,
+ "directive is not supported without architected flat scratch",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
@@ -4271,6 +4689,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_next_free_sgpr") {
SGPRRange = ValRange;
NextFreeSGPR = Val;
+ } else if (ID == ".amdhsa_accum_offset") {
+ if (!isGFX90A())
+ return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+ AccumOffset = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
@@ -4278,6 +4700,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_reserve_flat_scratch") {
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
+ if (hasArchitectedFlatScratch())
+ return Error(IDRange.Start,
+ "directive is not supported with architected flat scratch",
+ IDRange);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveFlatScr = Val;
@@ -4286,7 +4712,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start, "directive requires gfx8+", IDRange);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
- ReserveXNACK = Val;
+ if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
+ return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
+ IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
@@ -4311,6 +4739,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
ValRange);
+ } else if (ID == ".amdhsa_tg_split") {
+ if (!isGFX90A())
+ return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
+ ValRange);
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
@@ -4372,7 +4805,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
unsigned VGPRBlocks;
unsigned SGPRBlocks;
if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
- ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
+ getTargetStreamer().getTargetID()->isXnackOnOrAny(),
+ EnableWavefrontSize32, NextFreeVGPR,
VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
SGPRBlocks))
return true;
@@ -4395,9 +4829,21 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
UserSGPRCount);
+ if (isGFX90A()) {
+ if (Seen.find(".amdhsa_accum_offset") == Seen.end())
+ return TokError(".amdhsa_accum_offset directive is required");
+ if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
+ return TokError("accum_offset should be in range [4..256] in "
+ "increments of 4");
+ if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
+ return TokError("accum_offset exceeds total VGPR allocation");
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
+ (AccumOffset / 4 - 1));
+ }
+
getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
- ReserveFlatScr, ReserveXNACK);
+ ReserveFlatScr);
return false;
}
@@ -4423,9 +4869,9 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// targeted GPU.
if (isToken(AsmToken::EndOfStatement)) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
- getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
- ISA.Stepping,
- "AMD", "AMDGPU");
+ getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
+ ISA.Stepping,
+ "AMD", "AMDGPU");
return false;
}
@@ -4450,8 +4896,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
if (!parseString(ArchName, "invalid arch name"))
return true;
- getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
- VendorName, ArchName);
+ getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
+ VendorName, ArchName);
return false;
}
@@ -4560,19 +5006,11 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
"architectures");
}
- auto ISAVersionStringFromASM = getToken().getStringContents();
+ auto TargetIDDirective = getLexer().getTok().getStringContents();
+ if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
+ return Error(getParser().getTok().getLoc(), "target id must match options");
- std::string ISAVersionStringFromSTI;
- raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
- IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
-
- if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
- return Error(getLoc(),
- ".amd_amdgpu_isa directive does not match triple and/or mcpu "
- "arguments specified through the command line");
- }
-
- getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
+ getTargetStreamer().EmitISAVersion();
Lex();
return false;
@@ -4582,7 +5020,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
const char *AssemblerDirectiveBegin;
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
- isHsaAbiVersion3(&getSTI())
+ isHsaAbiVersion3Or4(&getSTI())
? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd)
: std::make_tuple(HSAMD::AssemblerDirectiveBegin,
@@ -4599,7 +5037,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
HSAMetadataString))
return true;
- if (isHsaAbiVersion3(&getSTI())) {
+ if (isHsaAbiVersion3Or4(&getSTI())) {
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
} else {
@@ -4749,12 +5187,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if (isHsaAbiVersion3(&getSTI())) {
- if (IDVal == ".amdgcn_target")
- return ParseDirectiveAMDGCNTarget();
-
+ if (isHsaAbiVersion3Or4(&getSTI())) {
if (IDVal == ".amdhsa_kernel")
- return ParseDirectiveAMDHSAKernel();
+ return ParseDirectiveAMDHSAKernel();
// TODO: Restructure/combine with PAL metadata directive.
if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
@@ -4779,6 +5214,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
return ParseDirectiveHSAMetadata();
}
+ if (IDVal == ".amdgcn_target")
+ return ParseDirectiveAMDGCNTarget();
+
if (IDVal == ".amdgpu_lds")
return ParseDirectiveAMDGPULDS();
@@ -4792,7 +5230,7 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
}
bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
- unsigned RegNo) const {
+ unsigned RegNo) {
for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
R.isValid(); ++R) {
@@ -4824,7 +5262,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
case AMDGPU::XNACK_MASK:
case AMDGPU::XNACK_MASK_LO:
case AMDGPU::XNACK_MASK_HI:
- return (isVI() || isGFX9()) && hasXNACK();
+ return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
case AMDGPU::SGPR_NULL:
return isGFX10Plus();
default:
@@ -4881,16 +5319,21 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
unsigned Prefix = Operands.size();
for (;;) {
+ auto Loc = getLoc();
ResTy = parseReg(Operands);
+ if (ResTy == MatchOperand_NoMatch)
+ Error(Loc, "expected a register");
if (ResTy != MatchOperand_Success)
- return ResTy;
+ return MatchOperand_ParseFail;
RBraceLoc = getLoc();
if (trySkipToken(AsmToken::RBrac))
break;
- if (!trySkipToken(AsmToken::Comma))
+ if (!skipToken(AsmToken::Comma,
+ "expected a comma or a closing square bracket")) {
return MatchOperand_ParseFail;
+ }
}
if (Operands.size() - Prefix > 1) {
@@ -4940,11 +5383,9 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
OperandMode Mode = OperandMode_Default;
if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
Mode = OperandMode_NSA;
+ CPolSeen = 0;
OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
- // Eat the comma or space if there is one.
- trySkipToken(AsmToken::Comma);
-
if (Res != MatchOperand_Success) {
checkUnsupportedInstruction(Name, NameLoc);
if (!Parser.hasPendingError()) {
@@ -4959,6 +5400,9 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
}
return true;
}
+
+ // Eat the comma or space if there is one.
+ trySkipToken(AsmToken::Comma);
}
return false;
@@ -5043,39 +5487,27 @@ AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
}
OperandMatchResultTy
-AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
+AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy) {
- int64_t Bit = 0;
+ int64_t Bit;
SMLoc S = getLoc();
- // We are at the end of the statement, and this is a default argument, so
- // use a default value.
- if (!isToken(AsmToken::EndOfStatement)) {
- switch(getTokenKind()) {
- case AsmToken::Identifier: {
- StringRef Tok = getTokenStr();
- if (Tok == Name) {
- if (Tok == "r128" && !hasMIMG_R128())
- Error(S, "r128 modifier is not supported on this GPU");
- if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
- Error(S, "a16 modifier is not supported on this GPU");
- Bit = 1;
- Parser.Lex();
- } else if (Tok.startswith("no") && Tok.endswith(Name)) {
- Bit = 0;
- Parser.Lex();
- } else {
- return MatchOperand_NoMatch;
- }
- break;
- }
- default:
- return MatchOperand_NoMatch;
- }
+ if (trySkipId(Name)) {
+ Bit = 1;
+ } else if (trySkipId("no", Name)) {
+ Bit = 0;
+ } else {
+ return MatchOperand_NoMatch;
}
- if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC)
+ if (Name == "r128" && !hasMIMG_R128()) {
+ Error(S, "r128 modifier is not supported on this GPU");
+ return MatchOperand_ParseFail;
+ }
+ if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
+ Error(S, "a16 modifier is not supported on this GPU");
return MatchOperand_ParseFail;
+ }
if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
ImmTy = AMDGPUOperand::ImmTyR128A16;
@@ -5084,6 +5516,62 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
return MatchOperand_Success;
}
+OperandMatchResultTy
+AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
+ unsigned CPolOn = 0;
+ unsigned CPolOff = 0;
+ SMLoc S = getLoc();
+
+ if (trySkipId("glc"))
+ CPolOn = AMDGPU::CPol::GLC;
+ else if (trySkipId("noglc"))
+ CPolOff = AMDGPU::CPol::GLC;
+ else if (trySkipId("slc"))
+ CPolOn = AMDGPU::CPol::SLC;
+ else if (trySkipId("noslc"))
+ CPolOff = AMDGPU::CPol::SLC;
+ else if (trySkipId("dlc"))
+ CPolOn = AMDGPU::CPol::DLC;
+ else if (trySkipId("nodlc"))
+ CPolOff = AMDGPU::CPol::DLC;
+ else if (trySkipId("scc"))
+ CPolOn = AMDGPU::CPol::SCC;
+ else if (trySkipId("noscc"))
+ CPolOff = AMDGPU::CPol::SCC;
+ else
+ return MatchOperand_NoMatch;
+
+ if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
+ Error(S, "dlc modifier is not supported on this GPU");
+ return MatchOperand_ParseFail;
+ }
+
+ if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
+ Error(S, "scc modifier is not supported on this GPU");
+ return MatchOperand_ParseFail;
+ }
+
+ if (CPolSeen & (CPolOn | CPolOff)) {
+ Error(S, "duplicate cache policy modifier");
+ return MatchOperand_ParseFail;
+ }
+
+ CPolSeen |= (CPolOn | CPolOff);
+
+ for (unsigned I = 1; I != Operands.size(); ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if (Op.isCPol()) {
+ Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
+ return MatchOperand_Success;
+ }
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
+ AMDGPUOperand::ImmTyCPol));
+
+ return MatchOperand_Success;
+}
+
static void addOptionalImmOperand(
MCInst& Inst, const OperandVector& Operands,
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
@@ -5757,7 +6245,7 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
}
return false;
}
- if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
+ if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
Error(Op.Loc, "invalid operation id");
return false;
}
@@ -5765,7 +6253,7 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
Error(Stream.Loc, "message operation does not support streams");
return false;
}
- if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
+ if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
Error(Stream.Loc, "invalid message stream id");
return false;
}
@@ -5934,6 +6422,18 @@ AMDGPUAsmParser::trySkipId(const StringRef Id) {
}
bool
+AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
+ if (isToken(AsmToken::Identifier)) {
+ StringRef Tok = getTokenStr();
+ if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
+ lex();
+ return true;
+ }
+ }
+ return false;
+}
+
+bool
AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
if (isId(Id) && peekToken().is(Kind)) {
lex();
@@ -6489,32 +6989,38 @@ AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
// mubuf
//===----------------------------------------------------------------------===//
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
- return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
}
void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
- const OperandVector &Operands,
- bool IsAtomic,
- bool IsAtomicReturn,
- bool IsLds) {
+ const OperandVector &Operands,
+ bool IsAtomic,
+ bool IsLds) {
bool IsLdsOpcode = IsLds;
bool HasLdsModifier = false;
OptionalImmIndexMap OptionalIdx;
- assert(IsAtomicReturn ? IsAtomic : true);
unsigned FirstOperandIdx = 1;
+ bool IsAtomicReturn = false;
+
+ if (IsAtomic) {
+ for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (!Op.isCPol())
+ continue;
+ IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
+ break;
+ }
+
+ if (!IsAtomicReturn) {
+ int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
+ if (NewOpc != -1)
+ Inst.setOpcode(NewOpc);
+ }
+
+ IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
+ SIInstrFlags::IsAtomicRet;
+ }
for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
@@ -6565,18 +7071,12 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
- if (!IsAtomic || IsAtomicReturn) {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
- IsAtomicReturn ? -1 : 0);
- }
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
-
- if (isGFX10Plus())
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
}
void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
@@ -6611,12 +7111,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
-
- if (isGFX10Plus())
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
}
//===----------------------------------------------------------------------===//
@@ -6658,14 +7155,12 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
if (IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
- if (IsGFX10Plus)
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
if (IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
if (!IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
@@ -6676,6 +7171,61 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
cvtMIMG(Inst, Operands, true);
}
+void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+ bool IsAtomicReturn = false;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (!Op.isCPol())
+ continue;
+ IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
+ break;
+ }
+
+ if (!IsAtomicReturn) {
+ int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
+ if (NewOpc != -1)
+ Inst.setOpcode(NewOpc);
+ }
+
+ IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
+ SIInstrFlags::IsAtomicRet;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ if (IsAtomicReturn && i == 1)
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle the case where soffset is an immediate
+ if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+ Op.addImmOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle tokens like 'offen' which are sometimes hard-coded into the
+ // asm string. There are no MCInst operands for these.
+ if (Op.isToken()) {
+ continue;
+ }
+ assert(Op.isImm());
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ if ((int)Inst.getNumOperands() <=
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
+}
+
void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
const OperandVector &Operands) {
for (unsigned I = 1; I < Operands.size(); ++I) {
@@ -6747,17 +7297,14 @@ static bool ConvertOmodDiv(int64_t &Div) {
return false;
}
+// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
+// This is intentional and ensures compatibility with sp3.
+// See bug 35397 for details.
static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
- if (BoundCtrl == 0) {
+ if (BoundCtrl == 0 || BoundCtrl == 1) {
BoundCtrl = 1;
return true;
}
-
- if (BoundCtrl == -1) {
- BoundCtrl = 0;
- return true;
- }
-
return false;
}
@@ -6772,9 +7319,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
- {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
- {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
- {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
+ {"", AMDGPUOperand::ImmTyCPol, false, nullptr},
{"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
@@ -6808,6 +7353,18 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
};
+void AMDGPUAsmParser::onBeginOfFile() {
+ if (!getParser().getStreamer().getTargetStreamer() ||
+ getSTI().getTargetTriple().getArch() == Triple::r600)
+ return;
+
+ if (!getTargetStreamer().getTargetID())
+ getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
+
+ if (isHsaAbiVersion3Or4(&getSTI()))
+ getTargetStreamer().EmitDirectiveAMDGCNTarget();
+}
+
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
OperandMatchResultTy res = parseOptionalOpr(Operands);
@@ -6857,6 +7414,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
Op.ConvertResult);
} else if (Op.Type == AMDGPUOperand::ImmTyDim) {
res = parseDim(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
+ res = parseCPol(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@@ -7010,6 +7569,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
+ Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
@@ -7028,16 +7588,13 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
cvtVOP3(Inst, Operands, OptionalIdx);
}
-void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
- const OperandVector &Operands) {
- OptionalImmIndexMap OptIdx;
+void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptIdx) {
const int Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
- cvtVOP3(Inst, Operands, OptIdx);
-
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
assert(!IsPacked);
Inst.addOperand(Inst.getOperand(0));
@@ -7046,7 +7603,10 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
// instruction, and then figure out where to actually put the modifiers
- addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ if (OpSelIdx != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
+ }
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
if (OpSelHiIdx != -1) {
@@ -7057,7 +7617,6 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
if (NegLoIdx != -1) {
- assert(IsPacked);
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
}
@@ -7069,16 +7628,16 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers };
- int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
-
- unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+ unsigned OpSel = 0;
unsigned OpSelHi = 0;
unsigned NegLo = 0;
unsigned NegHi = 0;
- if (OpSelHiIdx != -1) {
+ if (OpSelIdx != -1)
+ OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+ if (OpSelHiIdx != -1)
OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
- }
if (NegLoIdx != -1) {
int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
@@ -7111,6 +7670,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
}
}
+void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptIdx;
+ cvtVOP3(Inst, Operands, OptIdx);
+ cvtVOP3P(Inst, Operands, OptIdx);
+}
+
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//
@@ -7167,44 +7732,64 @@ bool AMDGPUOperand::isU16Imm() const {
return isImm() && isUInt<16>(getImm());
}
-OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
- if (!isGFX10Plus())
- return MatchOperand_NoMatch;
-
- SMLoc S = getLoc();
-
- if (!trySkipId("dim", AsmToken::Colon))
- return MatchOperand_NoMatch;
+//===----------------------------------------------------------------------===//
+// dim
+//===----------------------------------------------------------------------===//
- // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
- // integer.
+bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
+ // We want to allow "dim:1D" etc.,
+ // but the initial 1 is tokenized as an integer.
std::string Token;
if (isToken(AsmToken::Integer)) {
SMLoc Loc = getToken().getEndLoc();
Token = std::string(getTokenStr());
lex();
if (getLoc() != Loc)
- return MatchOperand_ParseFail;
+ return false;
}
- if (!isToken(AsmToken::Identifier))
- return MatchOperand_ParseFail;
- Token += getTokenStr();
+
+ StringRef Suffix;
+ if (!parseId(Suffix))
+ return false;
+ Token += Suffix;
StringRef DimId = Token;
if (DimId.startswith("SQ_RSRC_IMG_"))
- DimId = DimId.substr(12);
+ DimId = DimId.drop_front(12);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
if (!DimInfo)
- return MatchOperand_ParseFail;
+ return false;
+
+ Encoding = DimInfo->Encoding;
+ return true;
+}
- lex();
+OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
+ if (!isGFX10Plus())
+ return MatchOperand_NoMatch;
- Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
+ SMLoc S = getLoc();
+
+ if (!trySkipId("dim", AsmToken::Colon))
+ return MatchOperand_NoMatch;
+
+ unsigned Encoding;
+ SMLoc Loc = getLoc();
+ if (!parseDimId(Encoding)) {
+ Error(Loc, "invalid dim value");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
AMDGPUOperand::ImmTyDim));
return MatchOperand_Success;
}
+//===----------------------------------------------------------------------===//
+// dpp
+//===----------------------------------------------------------------------===//
+
OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
SMLoc S = getLoc();
@@ -7245,6 +7830,9 @@ OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
bool
AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
const OperandVector &Operands) {
+ if (Ctrl == "row_newbcast")
+ return isGFX90A();
+
if (Ctrl == "row_share" ||
Ctrl == "row_xmask")
return isGFX10Plus();
@@ -7322,6 +7910,7 @@ AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
.Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
.Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
.Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
+ .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
.Default({-1, 0, 0});
bool Valid;
@@ -7400,6 +7989,9 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
OptionalImmIndexMap OptionalIdx;
+ unsigned Opc = Inst.getOpcode();
+ bool HasModifiers =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
@@ -7426,7 +8018,8 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
if (IsDPP8) {
if (Op.isDPP8()) {
Op.addImmOperands(Inst, 1);
- } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ } else if (HasModifiers &&
+ isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithFPInputModsOperands(Inst, 2);
} else if (Op.isFI()) {
Fi = Op.getImm();
@@ -7436,8 +8029,11 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
llvm_unreachable("Invalid operand type");
}
} else {
- if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ if (HasModifiers &&
+ isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
} else if (Op.isDPPCtrl()) {
Op.addImmOperands(Inst, 1);
} else if (Op.isImm()) {
@@ -7691,8 +8287,6 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
case MCK_lds:
return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
- case MCK_glc:
- return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
case MCK_idxen:
return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
case MCK_offen: