Vendor import of llvm-project main 88e66fa60ae5, the last commit before - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
commit	344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
tree	f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
parent	b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)

vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5 vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0e vendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f

Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')

-rw-r--r--

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

1072

1 files changed, 833 insertions, 239 deletions

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index af4a47935e3f..00032c7d4ea5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

@@ -11,6 +11,7 @@

#include "MCTargetDesc/AMDGPUTargetStreamer.h"

#include "SIDefines.h"

#include "SIInstrInfo.h"

+#include "SIRegisterInfo.h"

#include "TargetInfo/AMDGPUTargetInfo.h"

#include "Utils/AMDGPUAsmUtils.h"

#include "Utils/AMDGPUBaseInfo.h"

@@ -113,9 +114,7 @@ public:

ImmTyInstOffset,

ImmTyOffset0,

ImmTyOffset1,

- ImmTyDLC,

- ImmTyGLC,

- ImmTySLC,

+ ImmTyCPol,

ImmTySWZ,

ImmTyTFE,

ImmTyD16,

@@ -299,6 +298,8 @@ public:

return isRegKind() && getReg() == AMDGPU::SGPR_NULL;

}

+ bool isVRegWithInputMods() const;

bool isSDWAOperand(MVT type) const;

bool isSDWAFP16Operand() const;

bool isSDWAFP32Operand() const;

@@ -336,12 +337,7 @@ public:

bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }

bool isGDS() const { return isImmTy(ImmTyGDS); }

bool isLDS() const { return isImmTy(ImmTyLDS); }

- bool isDLC() const { return isImmTy(ImmTyDLC); }

- bool isGLC() const { return isImmTy(ImmTyGLC); }

- // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced

- // value of the GLC operand.

- bool isGLC_1() const { return isImmTy(ImmTyGLC); }

- bool isSLC() const { return isImmTy(ImmTySLC); }

+ bool isCPol() const { return isImmTy(ImmTyCPol); }

bool isSWZ() const { return isImmTy(ImmTySWZ); }

bool isTFE() const { return isImmTy(ImmTyTFE); }

bool isD16() const { return isImmTy(ImmTyD16); }

@@ -449,6 +445,26 @@ public:

return isSSrcF16();

}

+ bool isSSrcV2FP32() const {

+ llvm_unreachable("cannot happen");

+ return isSSrcF32();

+ }

+ bool isSCSrcV2FP32() const {

+ llvm_unreachable("cannot happen");

+ return isSCSrcF32();

+ }

+ bool isSSrcV2INT32() const {

+ llvm_unreachable("cannot happen");

+ return isSSrcB32();

+ }

+ bool isSCSrcV2INT32() const {

+ llvm_unreachable("cannot happen");

+ return isSCSrcB32();

+ }

bool isSSrcOrLdsB32() const {

return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||

isLiteralImm(MVT::i32) || isExpr();

@@ -502,6 +518,22 @@ public:

return isVSrcB16() || isLiteralImm(MVT::v2i16);

}

+ bool isVCSrcV2FP32() const {

+ return isVCSrcF64();

+ }

+ bool isVSrcV2FP32() const {

+ return isVSrcF64() || isLiteralImm(MVT::v2f32);

+ }

+ bool isVCSrcV2INT32() const {

+ return isVCSrcB64();

+ }

+ bool isVSrcV2INT32() const {

+ return isVSrcB64() || isLiteralImm(MVT::v2i32);

+ }

bool isVSrcF32() const {

return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();

}

@@ -542,6 +574,102 @@ public:

return isVISrcF16() || isVISrcB32();

}

+ bool isVISrc_64B64() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);

+ }

+ bool isVISrc_64F64() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);

+ }

+ bool isVISrc_64V2FP32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);

+ }

+ bool isVISrc_64V2INT32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);

+ }

+ bool isVISrc_256B64() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);

+ }

+ bool isVISrc_256F64() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);

+ }

+ bool isVISrc_128B16() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);

+ }

+ bool isVISrc_128V2B16() const {

+ return isVISrc_128B16();

+ }

+ bool isVISrc_128B32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);

+ }

+ bool isVISrc_128F32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);

+ }

+ bool isVISrc_256V2FP32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);

+ }

+ bool isVISrc_256V2INT32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);

+ }

+ bool isVISrc_512B32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);

+ }

+ bool isVISrc_512B16() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);

+ }

+ bool isVISrc_512V2B16() const {

+ return isVISrc_512B16();

+ }

+ bool isVISrc_512F32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);

+ }

+ bool isVISrc_512F16() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);

+ }

+ bool isVISrc_512V2F16() const {

+ return isVISrc_512F16() || isVISrc_512B32();

+ }

+ bool isVISrc_1024B32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);

+ }

+ bool isVISrc_1024B16() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);

+ }

+ bool isVISrc_1024V2B16() const {

+ return isVISrc_1024B16();

+ }

+ bool isVISrc_1024F32() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);

+ }

+ bool isVISrc_1024F16() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);

+ }

+ bool isVISrc_1024V2F16() const {

+ return isVISrc_1024F16() || isVISrc_1024B32();

+ }

bool isAISrcB32() const {

return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);

}

@@ -566,6 +694,14 @@ public:

return isAISrcF16() || isAISrcB32();

}

+ bool isAISrc_64B64() const {

+ return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);

+ }

+ bool isAISrc_64F64() const {

+ return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);

+ }

bool isAISrc_128B32() const {

return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);

}

@@ -590,6 +726,22 @@ public:

return isAISrc_128F16() || isAISrc_128B32();

}

+ bool isVISrc_128F16() const {

+ return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);

+ }

+ bool isVISrc_128V2F16() const {

+ return isVISrc_128F16() || isVISrc_128B32();

+ }

+ bool isAISrc_256B64() const {

+ return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);

+ }

+ bool isAISrc_256F64() const {

+ return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);

+ }

bool isAISrc_512B32() const {

return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);

}

@@ -837,9 +989,7 @@ public:

case ImmTyInstOffset: OS << "InstOffset"; break;

case ImmTyOffset0: OS << "Offset0"; break;

case ImmTyOffset1: OS << "Offset1"; break;

- case ImmTyDLC: OS << "DLC"; break;

- case ImmTyGLC: OS << "GLC"; break;

- case ImmTySLC: OS << "SLC"; break;

+ case ImmTyCPol: OS << "CPol"; break;

case ImmTySWZ: OS << "SWZ"; break;

case ImmTyTFE: OS << "TFE"; break;

case ImmTyD16: OS << "D16"; break;

@@ -1021,6 +1171,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {

bool ForcedDPP = false;

bool ForcedSDWA = false;

KernelScopeInfo KernelScope;

+ unsigned CPolSeen;

/// @name Auto-generated Match Functions

/// {

@@ -1061,7 +1212,8 @@ private:

bool ParseDirectiveHSACodeObjectISA();

bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);

bool ParseDirectiveAMDKernelCodeT();

- bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;

+ // TODO: Possibly make subtargetHasRegister const.

+ bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);

bool ParseDirectiveAMDGPUHsaKernel();

bool ParseDirectiveISAVersion();

@@ -1105,7 +1257,7 @@ private:

bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,

unsigned RegWidth);

void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,

- bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);

+ bool IsAtomic, bool IsLds = false);

void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,

bool IsGdsHardcoded);

@@ -1140,7 +1292,7 @@ public:

// AsmParser::parseDirectiveSet() cannot be specialized for specific target.

AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());

MCContext &Ctx = getContext();

- if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {

+ if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {

MCSymbol *Sym =

Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));

Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));

@@ -1157,7 +1309,7 @@ public:

Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));

Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));

}

- if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {

+ if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {

initializeGprCountSymbol(IS_VGPR);

initializeGprCountSymbol(IS_SGPR);

} else

@@ -1165,10 +1317,6 @@ public:

}

- bool hasXNACK() const {

- return AMDGPU::hasXNACK(getSTI());

- }

bool hasMIMG_R128() const {

return AMDGPU::hasMIMG_R128(getSTI());

}

@@ -1181,6 +1329,8 @@ public:

return AMDGPU::hasGFX10A16(getSTI());

}

+ bool hasG16() const { return AMDGPU::hasG16(getSTI()); }

bool isSI() const {

return AMDGPU::isSI(getSTI());

}

@@ -1197,6 +1347,10 @@ public:

return AMDGPU::isGFX9(getSTI());

}

+ bool isGFX90A() const {

+ return AMDGPU::isGFX90A(getSTI());

+ }

bool isGFX9Plus() const {

return AMDGPU::isGFX9Plus(getSTI());

}

@@ -1219,6 +1373,10 @@ public:

return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];

}

+ bool hasArchitectedFlatScratch() const {

+ return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];

+ }

bool hasSGPR102_SGPR103() const {

return !isVI() && !isGFX9();

}

@@ -1294,8 +1452,9 @@ public:

bool (*ConvertResult)(int64_t&) = nullptr);

OperandMatchResultTy

- parseNamedBit(const char *Name, OperandVector &Operands,

+ parseNamedBit(StringRef Name, OperandVector &Operands,

AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);

+ OperandMatchResultTy parseCPol(OperandVector &Operands);

OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,

StringRef &Value,

SMLoc &StringLoc);

@@ -1379,14 +1538,19 @@ private:

bool validateMIMGAddrSize(const MCInst &Inst);

bool validateMIMGD16(const MCInst &Inst);

bool validateMIMGDim(const MCInst &Inst);

- bool validateLdsDirect(const MCInst &Inst);

+ bool validateMIMGMSAA(const MCInst &Inst);

bool validateOpSel(const MCInst &Inst);

+ bool validateDPP(const MCInst &Inst, const OperandVector &Operands);

bool validateVccOperand(unsigned Reg) const;

bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);

bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);

+ bool validateAGPRLdSt(const MCInst &Inst) const;

+ bool validateVGPRAlign(const MCInst &Inst) const;

+ bool validateGWS(const MCInst &Inst, const OperandVector &Operands);

bool validateDivScale(const MCInst &Inst);

bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,

const SMLoc &IDLoc);

+ Optional<StringRef> validateLdsDirect(const MCInst &Inst);

unsigned getConstantBusLimit(unsigned Opcode) const;

bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);

bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;

@@ -1403,6 +1567,7 @@ private:

bool isId(const AsmToken &Token, const StringRef Id) const;

bool isToken(const AsmToken::TokenKind Kind) const;

bool trySkipId(const StringRef Id);

+ bool trySkipId(const StringRef Pref, const StringRef Id);

bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);

bool trySkipToken(const AsmToken::TokenKind Kind);

bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);

@@ -1420,6 +1585,8 @@ private:

void lex();

public:

+ void onBeginOfFile() override;

OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);

OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);

@@ -1451,16 +1618,12 @@ public:

OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);

int64_t parseGPRIdxMacro();

- void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }

- void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }

- void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }

- void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }

+ void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }

+ void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }

+ void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }

void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);

- AMDGPUOperand::Ptr defaultDLC() const;

- AMDGPUOperand::Ptr defaultGLC() const;

- AMDGPUOperand::Ptr defaultGLC_1() const;

- AMDGPUOperand::Ptr defaultSLC() const;

+ AMDGPUOperand::Ptr defaultCPol() const;

AMDGPUOperand::Ptr defaultSMRDOffset8() const;

AMDGPUOperand::Ptr defaultSMEMOffset() const;

@@ -1474,6 +1637,8 @@ public:

void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);

void cvtVOP3(MCInst &Inst, const OperandVector &Operands);

void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);

+ void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,

+ OptionalImmIndexMap &OptionalIdx);

void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);

@@ -1482,6 +1647,9 @@ public:

void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);

void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);

+ void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);

+ bool parseDimId(unsigned &Encoding);

OperandMatchResultTy parseDim(OperandVector &Operands);

OperandMatchResultTy parseDPP8(OperandVector &Operands);

OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);

@@ -1551,11 +1719,16 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {

case AMDGPU::OPERAND_REG_INLINE_C_FP32:

case AMDGPU::OPERAND_REG_INLINE_AC_INT32:

case AMDGPU::OPERAND_REG_INLINE_AC_FP32:

+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:

+ case AMDGPU::OPERAND_REG_IMM_V2FP32:

+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:

+ case AMDGPU::OPERAND_REG_IMM_V2INT32:

return &APFloat::IEEEsingle();

case AMDGPU::OPERAND_REG_IMM_INT64:

case AMDGPU::OPERAND_REG_IMM_FP64:

case AMDGPU::OPERAND_REG_INLINE_C_INT64:

case AMDGPU::OPERAND_REG_INLINE_C_FP64:

+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:

return &APFloat::IEEEdouble();

case AMDGPU::OPERAND_REG_IMM_INT16:

case AMDGPU::OPERAND_REG_IMM_FP16:

@@ -1715,7 +1888,8 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {

// literal goes into the lower half and the upper half is zero. We also

// require that the literal may be losslesly converted to f16.

MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :

- (type == MVT::v2i16)? MVT::i16 : type;

+ (type == MVT::v2i16)? MVT::i16 :

+ (type == MVT::v2f32)? MVT::f32 : type;

APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));

return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);

@@ -1725,6 +1899,13 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {

return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());

}

+bool AMDGPUOperand::isVRegWithInputMods() const {

+ return isRegClass(AMDGPU::VGPR_32RegClassID) ||

+ // GFX90A allows DPP on 64-bit operands.

+ (isRegClass(AMDGPU::VReg_64RegClassID) &&

+ AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);

bool AMDGPUOperand::isSDWAOperand(MVT type) const {

if (AsmParser->isVI())

return isVReg32();

@@ -1751,8 +1932,9 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {

}

bool AMDGPUOperand::isBoolReg() const {

- return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||

- (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());

+ auto FB = AsmParser->getFeatureBits();

+ return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||

+ (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));

}

uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const

@@ -1806,6 +1988,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

case AMDGPU::OPERAND_REG_IMM_FP64:

case AMDGPU::OPERAND_REG_INLINE_C_INT64:

case AMDGPU::OPERAND_REG_INLINE_C_FP64:

+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:

if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),

AsmParser->hasInv2PiInlineImm())) {

Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));

@@ -1849,7 +2032,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:

case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:

case AMDGPU::OPERAND_REG_IMM_V2INT16:

- case AMDGPU::OPERAND_REG_IMM_V2FP16: {

+ case AMDGPU::OPERAND_REG_IMM_V2FP16:

+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:

+ case AMDGPU::OPERAND_REG_IMM_V2FP32:

+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:

+ case AMDGPU::OPERAND_REG_IMM_V2INT32: {

bool lost;

APFloat FPLiteral(APFloat::IEEEdouble(), Literal);

// Convert literal to single precision

@@ -1881,6 +2068,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

case AMDGPU::OPERAND_REG_INLINE_AC_FP32:

case AMDGPU::OPERAND_REG_IMM_V2INT16:

case AMDGPU::OPERAND_REG_IMM_V2FP16:

+ case AMDGPU::OPERAND_REG_IMM_V2FP32:

+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:

+ case AMDGPU::OPERAND_REG_IMM_V2INT32:

+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:

if (isSafeTruncation(Val, 32) &&

AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),

AsmParser->hasInv2PiInlineImm())) {

@@ -1897,6 +2088,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo

case AMDGPU::OPERAND_REG_IMM_FP64:

case AMDGPU::OPERAND_REG_INLINE_C_INT64:

case AMDGPU::OPERAND_REG_INLINE_C_FP64:

+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:

if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {

Inst.addOperand(MCOperand::createImm(Val));

setImmKindConst();

@@ -2000,6 +2192,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {

case 4: return AMDGPU::VReg_128RegClassID;

case 5: return AMDGPU::VReg_160RegClassID;

case 6: return AMDGPU::VReg_192RegClassID;

+ case 7: return AMDGPU::VReg_224RegClassID;

case 8: return AMDGPU::VReg_256RegClassID;

case 16: return AMDGPU::VReg_512RegClassID;

case 32: return AMDGPU::VReg_1024RegClassID;

@@ -2022,6 +2215,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {

case 4: return AMDGPU::SGPR_128RegClassID;

case 5: return AMDGPU::SGPR_160RegClassID;

case 6: return AMDGPU::SGPR_192RegClassID;

+ case 7: return AMDGPU::SGPR_224RegClassID;

case 8: return AMDGPU::SGPR_256RegClassID;

case 16: return AMDGPU::SGPR_512RegClassID;

}

@@ -2034,6 +2228,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {

case 4: return AMDGPU::AReg_128RegClassID;

case 5: return AMDGPU::AReg_160RegClassID;

case 6: return AMDGPU::AReg_192RegClassID;

+ case 7: return AMDGPU::AReg_224RegClassID;

case 8: return AMDGPU::AReg_256RegClassID;

case 16: return AMDGPU::AReg_512RegClassID;

case 32: return AMDGPU::AReg_1024RegClassID;

@@ -2529,7 +2724,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {

if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {

return nullptr;

}

- if (isHsaAbiVersion3(&getSTI())) {

+ if (isHsaAbiVersion3Or4(&getSTI())) {

if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))

return nullptr;

} else

@@ -3200,7 +3395,7 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {

return true;

unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);

- unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;

+ unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;

unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;

if (DMask == 0)

DMask = 1;

@@ -3230,6 +3425,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {

int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);

int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);

int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);

+ int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);

assert(VAddr0Idx != -1);

assert(SrsrcIdx != -1);

@@ -3241,22 +3437,26 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {

unsigned Dim = Inst.getOperand(DimIdx).getImm();

const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);

bool IsNSA = SrsrcIdx - VAddr0Idx > 1;

- unsigned VAddrSize =

+ unsigned ActualAddrSize =

IsNSA ? SrsrcIdx - VAddr0Idx

: AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;

+ bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());

+ unsigned ExpectedAddrSize =

+ AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());

- unsigned AddrSize = BaseOpcode->NumExtraArgs +

- (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +

- (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +

- (BaseOpcode->LodOrClampOrMip ? 1 : 0);

if (!IsNSA) {

- if (AddrSize > 8)

- AddrSize = 16;

- else if (AddrSize > 4)

- AddrSize = 8;

+ if (ExpectedAddrSize > 8)

+ ExpectedAddrSize = 16;

+ // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.

+ // This provides backward compatibility for assembly created

+ // before 160b/192b/224b types were directly supported.

+ if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))

+ return true;

}

- return VAddrSize == AddrSize;

+ return ActualAddrSize == ExpectedAddrSize;

}

bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {

@@ -3298,6 +3498,29 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {

return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;

}

+bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {

+ const unsigned Opc = Inst.getOpcode();

+ const MCInstrDesc &Desc = MII.get(Opc);

+ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)

+ return true;

+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);

+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =

+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);

+ if (!BaseOpcode->MSAA)

+ return true;

+ int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);

+ assert(DimIdx != -1);

+ unsigned Dim = Inst.getOperand(DimIdx).getImm();

+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);

+ return DimInfo->MSAA;

static bool IsMovrelsSDWAOpcode(const unsigned Opcode)

{

switch (Opcode) {

@@ -3559,7 +3782,7 @@ static bool IsRevOpcode(const unsigned Opcode)

}

-bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {

+Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {

using namespace SIInstrFlags;

const unsigned Opcode = Inst.getOpcode();

@@ -3567,33 +3790,29 @@ bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {

// lds_direct register is defined so that it can be used

// with 9-bit operands only. Ignore encodings which do not accept these.

- return true;

+ if ((Desc.TSFlags & Enc) == 0)

+ return None;

- const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);

- const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);

- const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);

+ for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {

+ auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);

+ if (SrcIdx == -1)

+ break;

+ const auto &Src = Inst.getOperand(SrcIdx);

+ if (Src.isReg() && Src.getReg() == LDS_DIRECT) {

- const int SrcIndices[] = { Src1Idx, Src2Idx };

+ if (isGFX90A())

+ return StringRef("lds_direct is not supported on this GPU");

- // lds_direct cannot be specified as either src1 or src2.

- for (int SrcIdx : SrcIndices) {

- if (SrcIdx == -1) break;

- const MCOperand &Src = Inst.getOperand(SrcIdx);

- if (Src.isReg() && Src.getReg() == LDS_DIRECT) {

- return false;

+ if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))

+ return StringRef("lds_direct cannot be used with this instruction");

+ if (SrcName != OpName::src0)

+ return StringRef("lds_direct may be used as src0 only");

}

- if (Src0Idx == -1)

- return true;

- const MCOperand &Src = Inst.getOperand(Src0Idx);

- if (!Src.isReg() || Src.getReg() != LDS_DIRECT)

- return true;

- // lds_direct is specified as src0. Check additional limitations.

- return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);

+ return None;

}

SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {

@@ -3624,7 +3843,7 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,

// For FLAT segment the offset must be positive;

// MSB is ignored and forced to zero.

- if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {

+ if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {

unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);

if (!isIntN(OffsetSize, Op.getImm())) {

Error(getFlatOffsetLoc(Operands),

@@ -3733,6 +3952,28 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {

return true;

}

+bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,

+ const OperandVector &Operands) {

+ const unsigned Opc = Inst.getOpcode();

+ int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);

+ if (DppCtrlIdx < 0)

+ return true;

+ unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();

+ if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {

+ // DPP64 is supported for row_newbcast only.

+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);

+ if (Src0Idx >= 0 &&

+ getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {

+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);

+ Error(S, "64 bit dpp only supports row_newbcast");

+ return false;

+ }

+ return true;

// Check if VCC register matches wavefront size

bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {

auto FB = getFeatureBits();

@@ -3802,18 +4043,148 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,

return true;

}

+// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.

+static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,

+ const MCRegisterInfo *MRI) {

+ int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);

+ if (OpIdx < 0)

+ return -1;

+ const MCOperand &Op = Inst.getOperand(OpIdx);

+ if (!Op.isReg())

+ return -1;

+ unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);

+ auto Reg = Sub ? Sub : Op.getReg();

+ const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);

+ return AGPR32.contains(Reg) ? 1 : 0;

+bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {

+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

+ if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |

+ SIInstrFlags::MTBUF | SIInstrFlags::MIMG |

+ SIInstrFlags::DS)) == 0)

+ return true;

+ uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0

+ : AMDGPU::OpName::vdata;

+ const MCRegisterInfo *MRI = getMRI();

+ int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);

+ int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);

+ if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {

+ int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);

+ if (Data2Areg >= 0 && Data2Areg != DataAreg)

+ return false;

+ }

+ auto FB = getFeatureBits();

+ if (FB[AMDGPU::FeatureGFX90AInsts]) {

+ if (DataAreg < 0 || DstAreg < 0)

+ return true;

+ return DstAreg == DataAreg;

+ }

+ return DstAreg < 1 && DataAreg < 1;

+bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {

+ auto FB = getFeatureBits();

+ if (!FB[AMDGPU::FeatureGFX90AInsts])

+ return true;

+ const MCRegisterInfo *MRI = getMRI();

+ const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);

+ const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);

+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {

+ const MCOperand &Op = Inst.getOperand(I);

+ if (!Op.isReg())

+ continue;

+ unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);

+ if (!Sub)

+ continue;

+ if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))

+ return false;

+ if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))

+ return false;

+ }

+ return true;

+// gfx90a has an undocumented limitation:

+// DS_GWS opcodes must use even aligned registers.

+bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,

+ const OperandVector &Operands) {

+ if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])

+ return true;

+ int Opc = Inst.getOpcode();

+ if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&

+ Opc != AMDGPU::DS_GWS_SEMA_BR_vi)

+ return true;

+ const MCRegisterInfo *MRI = getMRI();

+ const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);

+ int Data0Pos =

+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);

+ assert(Data0Pos != -1);

+ auto Reg = Inst.getOperand(Data0Pos).getReg();

+ auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);

+ if (RegIdx & 1) {

+ SMLoc RegLoc = getRegLoc(Reg, Operands);

+ Error(RegLoc, "vgpr must be even aligned");

+ return false;

+ }

+ return true;

bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,

const OperandVector &Operands,

const SMLoc &IDLoc) {

- int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),

- AMDGPU::OpName::glc1);

- if (GLCPos != -1) {

- // -1 is set by GLC_1 default operand. In all cases "glc" must be present

- // in the asm string, and the default value means it is not present.

- if (Inst.getOperand(GLCPos).getImm() == -1) {

+ int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),

+ AMDGPU::OpName::cpol);

+ if (CPolPos == -1)

+ return true;

+ unsigned CPol = Inst.getOperand(CPolPos).getImm();

+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;

+ if ((TSFlags & (SIInstrFlags::SMRD)) &&

+ (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {

+ Error(IDLoc, "invalid cache policy for SMRD instruction");

+ return false;

+ }

+ if (isGFX90A() && (CPol & CPol::SCC)) {

+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

+ StringRef CStr(S.getPointer());

+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);

+ Error(S, "scc is not supported on this GPU");

+ return false;

+ }

+ if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))

+ return true;

+ if (TSFlags & SIInstrFlags::IsAtomicRet) {

+ if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {

Error(IDLoc, "instruction must use glc");

return false;

}

+ } else {

+ if (CPol & CPol::GLC) {

+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);

+ StringRef CStr(S.getPointer());

+ S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);

+ Error(S, "instruction must not use glc");

+ return false;

+ }

}

return true;

@@ -3822,9 +4193,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,

bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,

const SMLoc &IDLoc,

const OperandVector &Operands) {

- if (!validateLdsDirect(Inst)) {

- Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),

- "invalid use of lds_direct");

+ if (auto ErrMsg = validateLdsDirect(Inst)) {

+ Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);

return false;

}

if (!validateSOPLiteral(Inst)) {

@@ -3851,6 +4221,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,

"invalid op_sel operand");

return false;

}

+ if (!validateDPP(Inst, Operands)) {

+ return false;

+ }

// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.

if (!validateMIMGD16(Inst)) {

Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),

@@ -3861,6 +4234,11 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,

Error(IDLoc, "dim modifier is required on this GPU");

return false;

}

+ if (!validateMIMGMSAA(Inst)) {

+ Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),

+ "invalid dim; must be MSAA type");

+ return false;

+ }

if (!validateMIMGDataSize(Inst)) {

Error(IDLoc,

"image data size does not match dmask and tfe");

@@ -3893,6 +4271,26 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,

if (!validateMAIAccWrite(Inst, Operands)) {

return false;

}

+ if (!validateCoherencyBits(Inst, Operands, IDLoc)) {

+ return false;

+ }

+ if (!validateAGPRLdSt(Inst)) {

+ Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]

+ ? "invalid register class: data and dst should be all VGPR or AGPR"

+ : "invalid register class: agpr loads and stores not supported on this GPU"

+ );

+ return false;

+ }

+ if (!validateVGPRAlign(Inst)) {

+ Error(IDLoc,

+ "invalid register class: vgpr tuples must be 64 bit aligned");

+ return false;

+ }

+ if (!validateGWS(Inst, Operands)) {

+ return false;

+ }

if (!validateDivScale(Inst)) {

Error(IDLoc, "ABS not allowed in VOP3B instructions");

return false;

@@ -4062,21 +4460,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {

if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)

return TokError("directive only supported for amdgcn architecture");

- std::string Target;

- SMLoc TargetStart = getLoc();

- if (getParser().parseEscapedString(Target))

+ std::string TargetIDDirective;

+ SMLoc TargetStart = getTok().getLoc();

+ if (getParser().parseEscapedString(TargetIDDirective))

return true;

- SMRange TargetRange = SMRange(TargetStart, getLoc());

- std::string ExpectedTarget;

- raw_string_ostream ExpectedTargetOS(ExpectedTarget);

- IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);

+ SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());

+ if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)

+ return getParser().Error(TargetRange.Start,

+ (Twine(".amdgcn_target directive's target id ") +

+ Twine(TargetIDDirective) +

+ Twine(" does not match the specified target id ") +

+ Twine(getTargetStreamer().getTargetID()->toString())).str());

- if (Target != ExpectedTargetOS.str())

- return Error(TargetRange.Start, "target must match options", TargetRange);

- getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);

return false;

}

@@ -4143,12 +4539,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

SMRange VGPRRange;

uint64_t NextFreeVGPR = 0;

+ uint64_t AccumOffset = 0;

SMRange SGPRRange;

uint64_t NextFreeSGPR = 0;

unsigned UserSGPRCount = 0;

bool ReserveVCC = true;

bool ReserveFlatScr = true;

- bool ReserveXNACK = hasXNACK();

Optional<bool> EnableWavefrontSize32;

while (true) {

@@ -4191,7 +4587,15 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))

return OutOfRangeError(ValRange);

KD.private_segment_fixed_size = Val;

+ } else if (ID == ".amdhsa_kernarg_size") {

+ if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))

+ return OutOfRangeError(ValRange);

+ KD.kernarg_size = Val;

} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {

+ if (hasArchitectedFlatScratch())

+ return Error(IDRange.Start,

+ "directive is not supported with architected flat scratch",

+ IDRange);

PARSE_BITS_ENTRY(KD.kernel_code_properties,

KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,

Val, ValRange);

@@ -4222,6 +4626,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

if (Val)

UserSGPRCount += 2;

} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {

+ if (hasArchitectedFlatScratch())

+ return Error(IDRange.Start,

+ "directive is not supported with architected flat scratch",

+ IDRange);

PARSE_BITS_ENTRY(KD.kernel_code_properties,

KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,

ValRange);

@@ -4241,10 +4649,20 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,

Val, ValRange);

} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {

- PARSE_BITS_ENTRY(

- KD.compute_pgm_rsrc2,

- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,

- ValRange);

+ if (hasArchitectedFlatScratch())

+ return Error(IDRange.Start,

+ "directive is not supported with architected flat scratch",

+ IDRange);

+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);

+ } else if (ID == ".amdhsa_enable_private_segment") {

+ if (!hasArchitectedFlatScratch())

+ return Error(

+ IDRange.Start,

+ "directive is not supported without architected flat scratch",

+ IDRange);

+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);

} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {

PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,

COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,

@@ -4271,6 +4689,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

} else if (ID == ".amdhsa_next_free_sgpr") {

SGPRRange = ValRange;

NextFreeSGPR = Val;

+ } else if (ID == ".amdhsa_accum_offset") {

+ if (!isGFX90A())

+ return Error(IDRange.Start, "directive requires gfx90a+", IDRange);

+ AccumOffset = Val;

} else if (ID == ".amdhsa_reserve_vcc") {

if (!isUInt<1>(Val))

return OutOfRangeError(ValRange);

@@ -4278,6 +4700,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

} else if (ID == ".amdhsa_reserve_flat_scratch") {

if (IVersion.Major < 7)

return Error(IDRange.Start, "directive requires gfx7+", IDRange);

+ if (hasArchitectedFlatScratch())

+ return Error(IDRange.Start,

+ "directive is not supported with architected flat scratch",

+ IDRange);

if (!isUInt<1>(Val))

return OutOfRangeError(ValRange);

ReserveFlatScr = Val;

@@ -4286,7 +4712,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

return Error(IDRange.Start, "directive requires gfx8+", IDRange);

if (!isUInt<1>(Val))

return OutOfRangeError(ValRange);

- ReserveXNACK = Val;

+ if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())

+ return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",

+ IDRange);

} else if (ID == ".amdhsa_float_round_mode_32") {

PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,

COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);

@@ -4311,6 +4739,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

return Error(IDRange.Start, "directive requires gfx9+", IDRange);

PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,

ValRange);

+ } else if (ID == ".amdhsa_tg_split") {

+ if (!isGFX90A())

+ return Error(IDRange.Start, "directive requires gfx90a+", IDRange);

+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,

+ ValRange);

} else if (ID == ".amdhsa_workgroup_processor_mode") {

if (IVersion.Major < 10)

return Error(IDRange.Start, "directive requires gfx10+", IDRange);

@@ -4372,7 +4805,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

unsigned VGPRBlocks;

unsigned SGPRBlocks;

if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,

- ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,

+ getTargetStreamer().getTargetID()->isXnackOnOrAny(),

+ EnableWavefrontSize32, NextFreeVGPR,

VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,

SGPRBlocks))

return true;

@@ -4395,9 +4829,21 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {

AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,

UserSGPRCount);

+ if (isGFX90A()) {

+ if (Seen.find(".amdhsa_accum_offset") == Seen.end())

+ return TokError(".amdhsa_accum_offset directive is required");

+ if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))

+ return TokError("accum_offset should be in range [4..256] in "

+ "increments of 4");

+ if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))

+ return TokError("accum_offset exceeds total VGPR allocation");

+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,

+ (AccumOffset / 4 - 1));

+ }

getTargetStreamer().EmitAmdhsaKernelDescriptor(

getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,

- ReserveFlatScr, ReserveXNACK);

+ ReserveFlatScr);

return false;

}

@@ -4423,9 +4869,9 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {

// targeted GPU.

if (isToken(AsmToken::EndOfStatement)) {

AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());

- getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,

- ISA.Stepping,

- "AMD", "AMDGPU");

+ getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,

+ ISA.Stepping,

+ "AMD", "AMDGPU");

return false;

}

@@ -4450,8 +4896,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {

if (!parseString(ArchName, "invalid arch name"))

return true;

- getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,

- VendorName, ArchName);

+ getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,

+ VendorName, ArchName);

return false;

}

@@ -4560,19 +5006,11 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {

"architectures");

}

- auto ISAVersionStringFromASM = getToken().getStringContents();

+ auto TargetIDDirective = getLexer().getTok().getStringContents();

+ if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)

+ return Error(getParser().getTok().getLoc(), "target id must match options");

- std::string ISAVersionStringFromSTI;

- raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);

- IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);

- if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {

- return Error(getLoc(),

- ".amd_amdgpu_isa directive does not match triple and/or mcpu "

- "arguments specified through the command line");

- }

- getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());

+ getTargetStreamer().EmitISAVersion();

Lex();

return false;

@@ -4582,7 +5020,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {

const char *AssemblerDirectiveBegin;

const char *AssemblerDirectiveEnd;

std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =

- isHsaAbiVersion3(&getSTI())

+ isHsaAbiVersion3Or4(&getSTI())

? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,

HSAMD::V3::AssemblerDirectiveEnd)

: std::make_tuple(HSAMD::AssemblerDirectiveBegin,

@@ -4599,7 +5037,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {

HSAMetadataString))

return true;

- if (isHsaAbiVersion3(&getSTI())) {

+ if (isHsaAbiVersion3Or4(&getSTI())) {

if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))

return Error(getLoc(), "invalid HSA metadata");

} else {

@@ -4749,12 +5187,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {

bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {

StringRef IDVal = DirectiveID.getString();

- if (isHsaAbiVersion3(&getSTI())) {

- if (IDVal == ".amdgcn_target")

- return ParseDirectiveAMDGCNTarget();

+ if (isHsaAbiVersion3Or4(&getSTI())) {

if (IDVal == ".amdhsa_kernel")

- return ParseDirectiveAMDHSAKernel();

+ return ParseDirectiveAMDHSAKernel();

// TODO: Restructure/combine with PAL metadata directive.

if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)

@@ -4779,6 +5214,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {

return ParseDirectiveHSAMetadata();

}

+ if (IDVal == ".amdgcn_target")

+ return ParseDirectiveAMDGCNTarget();

if (IDVal == ".amdgpu_lds")

return ParseDirectiveAMDGPULDS();

@@ -4792,7 +5230,7 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {

}

bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,

- unsigned RegNo) const {

+ unsigned RegNo) {

for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);

R.isValid(); ++R) {

@@ -4824,7 +5262,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,

case AMDGPU::XNACK_MASK:

case AMDGPU::XNACK_MASK_LO:

case AMDGPU::XNACK_MASK_HI:

- return (isVI() || isGFX9()) && hasXNACK();

+ return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();

case AMDGPU::SGPR_NULL:

return isGFX10Plus();

default:

@@ -4881,16 +5319,21 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,

unsigned Prefix = Operands.size();

for (;;) {

+ auto Loc = getLoc();

ResTy = parseReg(Operands);

+ if (ResTy == MatchOperand_NoMatch)

+ Error(Loc, "expected a register");

if (ResTy != MatchOperand_Success)

- return ResTy;

+ return MatchOperand_ParseFail;

RBraceLoc = getLoc();

if (trySkipToken(AsmToken::RBrac))

break;

- if (!trySkipToken(AsmToken::Comma))

+ if (!skipToken(AsmToken::Comma,

+ "expected a comma or a closing square bracket")) {

return MatchOperand_ParseFail;

+ }

}

if (Operands.size() - Prefix > 1) {

@@ -4940,11 +5383,9 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,

OperandMode Mode = OperandMode_Default;

if (IsMIMG && isGFX10Plus() && Operands.size() == 2)

Mode = OperandMode_NSA;

+ CPolSeen = 0;

OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);

- // Eat the comma or space if there is one.

- trySkipToken(AsmToken::Comma);

if (Res != MatchOperand_Success) {

checkUnsupportedInstruction(Name, NameLoc);

if (!Parser.hasPendingError()) {

@@ -4959,6 +5400,9 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,

}

return true;

}

+ // Eat the comma or space if there is one.

+ trySkipToken(AsmToken::Comma);

}

return false;

@@ -5043,39 +5487,27 @@ AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,

}

OperandMatchResultTy

-AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,

+AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,

AMDGPUOperand::ImmTy ImmTy) {

- int64_t Bit = 0;

+ int64_t Bit;

SMLoc S = getLoc();

- // We are at the end of the statement, and this is a default argument, so

- // use a default value.

- if (!isToken(AsmToken::EndOfStatement)) {

- switch(getTokenKind()) {

- case AsmToken::Identifier: {

- StringRef Tok = getTokenStr();

- if (Tok == Name) {

- if (Tok == "r128" && !hasMIMG_R128())

- Error(S, "r128 modifier is not supported on this GPU");

- if (Tok == "a16" && !isGFX9() && !hasGFX10A16())

- Error(S, "a16 modifier is not supported on this GPU");

- Bit = 1;

- Parser.Lex();

- } else if (Tok.startswith("no") && Tok.endswith(Name)) {

- Bit = 0;

- Parser.Lex();

- } else {

- return MatchOperand_NoMatch;

- }

- break;

- }

- default:

- return MatchOperand_NoMatch;

- }

+ if (trySkipId(Name)) {

+ Bit = 1;

+ } else if (trySkipId("no", Name)) {

+ Bit = 0;

+ } else {

+ return MatchOperand_NoMatch;

}

- if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC)

+ if (Name == "r128" && !hasMIMG_R128()) {

+ Error(S, "r128 modifier is not supported on this GPU");

+ return MatchOperand_ParseFail;

+ }

+ if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {

+ Error(S, "a16 modifier is not supported on this GPU");

return MatchOperand_ParseFail;

+ }

if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)

ImmTy = AMDGPUOperand::ImmTyR128A16;

@@ -5084,6 +5516,62 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,

return MatchOperand_Success;

}

+OperandMatchResultTy

+AMDGPUAsmParser::parseCPol(OperandVector &Operands) {

+ unsigned CPolOn = 0;

+ unsigned CPolOff = 0;

+ SMLoc S = getLoc();

+ if (trySkipId("glc"))

+ CPolOn = AMDGPU::CPol::GLC;

+ else if (trySkipId("noglc"))

+ CPolOff = AMDGPU::CPol::GLC;

+ else if (trySkipId("slc"))

+ CPolOn = AMDGPU::CPol::SLC;

+ else if (trySkipId("noslc"))

+ CPolOff = AMDGPU::CPol::SLC;

+ else if (trySkipId("dlc"))

+ CPolOn = AMDGPU::CPol::DLC;

+ else if (trySkipId("nodlc"))

+ CPolOff = AMDGPU::CPol::DLC;

+ else if (trySkipId("scc"))

+ CPolOn = AMDGPU::CPol::SCC;

+ else if (trySkipId("noscc"))

+ CPolOff = AMDGPU::CPol::SCC;

+ else

+ return MatchOperand_NoMatch;

+ if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {

+ Error(S, "dlc modifier is not supported on this GPU");

+ return MatchOperand_ParseFail;

+ }

+ if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {

+ Error(S, "scc modifier is not supported on this GPU");

+ return MatchOperand_ParseFail;

+ }

+ if (CPolSeen & (CPolOn | CPolOff)) {

+ Error(S, "duplicate cache policy modifier");

+ return MatchOperand_ParseFail;

+ }

+ CPolSeen |= (CPolOn | CPolOff);

+ for (unsigned I = 1; I != Operands.size(); ++I) {

+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);

+ if (Op.isCPol()) {

+ Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);

+ return MatchOperand_Success;

+ }

+ Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,

+ AMDGPUOperand::ImmTyCPol));

+ return MatchOperand_Success;

static void addOptionalImmOperand(

MCInst& Inst, const OperandVector& Operands,

AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,

@@ -5757,7 +6245,7 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,

}

return false;

}

- if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {

+ if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {

Error(Op.Loc, "invalid operation id");

return false;

}

@@ -5765,7 +6253,7 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,

Error(Stream.Loc, "message operation does not support streams");

return false;

}

- if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {

+ if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {

Error(Stream.Loc, "invalid message stream id");

return false;

}

@@ -5934,6 +6422,18 @@ AMDGPUAsmParser::trySkipId(const StringRef Id) {

}

bool

+AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {

+ if (isToken(AsmToken::Identifier)) {

+ StringRef Tok = getTokenStr();

+ if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {

+ lex();

+ return true;

+ }

+ return false;

+bool

AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {

if (isId(Id) && peekToken().is(Kind)) {

lex();

@@ -6489,32 +6989,38 @@ AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {

// mubuf

//===----------------------------------------------------------------------===//

-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {

- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);

-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {

- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);

-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {

- return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);

-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {

- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);

+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {

+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);

}

void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,

- const OperandVector &Operands,

- bool IsAtomic,

- bool IsAtomicReturn,

- bool IsLds) {

+ const OperandVector &Operands,

+ bool IsAtomic,

+ bool IsLds) {

bool IsLdsOpcode = IsLds;

bool HasLdsModifier = false;

OptionalImmIndexMap OptionalIdx;

- assert(IsAtomicReturn ? IsAtomic : true);

unsigned FirstOperandIdx = 1;

+ bool IsAtomicReturn = false;

+ if (IsAtomic) {

+ for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {

+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

+ if (!Op.isCPol())

+ continue;

+ IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;

+ break;

+ }

+ if (!IsAtomicReturn) {

+ int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());

+ if (NewOpc != -1)

+ Inst.setOpcode(NewOpc);

+ }

+ IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &

+ SIInstrFlags::IsAtomicRet;

+ }

for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {

AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

@@ -6565,18 +7071,12 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,

}

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);

- if (!IsAtomic || IsAtomicReturn) {

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,

- IsAtomicReturn ? -1 : 0);

- }

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);

+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);

if (!IsLdsOpcode) { // tfe is not legal with lds opcodes

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);

}

- if (isGFX10Plus())

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);

+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);

}

void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {

@@ -6611,12 +7111,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {

addOptionalImmOperand(Inst, Operands, OptionalIdx,

AMDGPUOperand::ImmTyOffset);

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);

+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);

- if (isGFX10Plus())

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);

+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);

}

//===----------------------------------------------------------------------===//

@@ -6658,14 +7155,12 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,

if (IsGFX10Plus)

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);

- if (IsGFX10Plus)

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);

+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);

if (IsGFX10Plus)

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);

- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);

+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)

+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);

if (!IsGFX10Plus)

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);

@@ -6676,6 +7171,61 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)

cvtMIMG(Inst, Operands, true);

}

+void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {

+ OptionalImmIndexMap OptionalIdx;

+ bool IsAtomicReturn = false;

+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {

+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

+ if (!Op.isCPol())

+ continue;

+ IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;

+ break;

+ }

+ if (!IsAtomicReturn) {

+ int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());

+ if (NewOpc != -1)

+ Inst.setOpcode(NewOpc);

+ }

+ IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &

+ SIInstrFlags::IsAtomicRet;

+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {

+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);

+ // Add the register arguments

+ if (Op.isReg()) {

+ Op.addRegOperands(Inst, 1);

+ if (IsAtomicReturn && i == 1)

+ Op.addRegOperands(Inst, 1);

+ continue;

+ }

+ // Handle the case where soffset is an immediate

+ if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {

+ Op.addImmOperands(Inst, 1);

+ continue;

+ }

+ // Handle tokens like 'offen' which are sometimes hard-coded into the

+ // asm string. There are no MCInst operands for these.

+ if (Op.isToken()) {

+ continue;

+ }

+ assert(Op.isImm());

+ // Handle optional arguments

+ OptionalIdx[Op.getImmTy()] = i;

+ }

+ if ((int)Inst.getNumOperands() <=

+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))

+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);

+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);

void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,

const OperandVector &Operands) {

for (unsigned I = 1; I < Operands.size(); ++I) {

@@ -6747,17 +7297,14 @@ static bool ConvertOmodDiv(int64_t &Div) {

return false;

}

+// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.

+// This is intentional and ensures compatibility with sp3.

+// See bug 35397 for details.

static bool ConvertBoundCtrl(int64_t &BoundCtrl) {

- if (BoundCtrl == 0) {

+ if (BoundCtrl == 0 || BoundCtrl == 1) {

BoundCtrl = 1;

return true;

}

- if (BoundCtrl == -1) {

- BoundCtrl = 0;

- return true;

- }

return false;

}

@@ -6772,9 +7319,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {

{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},

{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},

{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},

- {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},

- {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},

- {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},

+ {"", AMDGPUOperand::ImmTyCPol, false, nullptr},

{"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},

{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},

{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},

@@ -6808,6 +7353,18 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {

{"abid", AMDGPUOperand::ImmTyABID, false, nullptr}

};

+void AMDGPUAsmParser::onBeginOfFile() {

+ if (!getParser().getStreamer().getTargetStreamer() ||

+ getSTI().getTargetTriple().getArch() == Triple::r600)

+ return;

+ if (!getTargetStreamer().getTargetID())

+ getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());

+ if (isHsaAbiVersion3Or4(&getSTI()))

+ getTargetStreamer().EmitDirectiveAMDGCNTarget();

OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {

OperandMatchResultTy res = parseOptionalOpr(Operands);

@@ -6857,6 +7414,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)

Op.ConvertResult);

} else if (Op.Type == AMDGPUOperand::ImmTyDim) {

res = parseDim(Operands);

+ } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {

+ res = parseCPol(Operands);

} else {

res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);

}

@@ -7010,6 +7569,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,

Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||

Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||

Opc == AMDGPU::V_MAC_F16_e64_vi ||

+ Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||

Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||

Opc == AMDGPU::V_FMAC_F32_e64_vi ||

Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||

@@ -7028,16 +7588,13 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {

cvtVOP3(Inst, Operands, OptionalIdx);

}

-void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,

- const OperandVector &Operands) {

- OptionalImmIndexMap OptIdx;

+void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,

+ OptionalImmIndexMap &OptIdx) {

const int Opc = Inst.getOpcode();

const MCInstrDesc &Desc = MII.get(Opc);

const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;

- cvtVOP3(Inst, Operands, OptIdx);

if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {

assert(!IsPacked);

Inst.addOperand(Inst.getOperand(0));

@@ -7046,7 +7603,10 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,

// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3

// instruction, and then figure out where to actually put the modifiers

- addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);

+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

+ if (OpSelIdx != -1) {

+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);

+ }

int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);

if (OpSelHiIdx != -1) {

@@ -7057,7 +7617,6 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,

int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);

if (NegLoIdx != -1) {

- assert(IsPacked);

addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);

addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);

}

@@ -7069,16 +7628,16 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,

AMDGPU::OpName::src1_modifiers,

AMDGPU::OpName::src2_modifiers };

- int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);

- unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();

+ unsigned OpSel = 0;

unsigned OpSelHi = 0;

unsigned NegLo = 0;

unsigned NegHi = 0;

- if (OpSelHiIdx != -1) {

+ if (OpSelIdx != -1)

+ OpSel = Inst.getOperand(OpSelIdx).getImm();

+ if (OpSelHiIdx != -1)

OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();

- }

if (NegLoIdx != -1) {

int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);

@@ -7111,6 +7670,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,

}

+void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {

+ OptionalImmIndexMap OptIdx;

+ cvtVOP3(Inst, Operands, OptIdx);

+ cvtVOP3P(Inst, Operands, OptIdx);

//===----------------------------------------------------------------------===//

// dpp

//===----------------------------------------------------------------------===//

@@ -7167,44 +7732,64 @@ bool AMDGPUOperand::isU16Imm() const {

return isImm() && isUInt<16>(getImm());

}

-OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {

- if (!isGFX10Plus())

- return MatchOperand_NoMatch;

- SMLoc S = getLoc();

- if (!trySkipId("dim", AsmToken::Colon))

- return MatchOperand_NoMatch;

+//===----------------------------------------------------------------------===//

+// dim

+//===----------------------------------------------------------------------===//

- // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an

- // integer.

+bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {

+ // We want to allow "dim:1D" etc.,

+ // but the initial 1 is tokenized as an integer.

std::string Token;

if (isToken(AsmToken::Integer)) {

SMLoc Loc = getToken().getEndLoc();

Token = std::string(getTokenStr());

lex();

if (getLoc() != Loc)

- return MatchOperand_ParseFail;

+ return false;

}

- if (!isToken(AsmToken::Identifier))

- return MatchOperand_ParseFail;

- Token += getTokenStr();

+ StringRef Suffix;

+ if (!parseId(Suffix))

+ return false;

+ Token += Suffix;

StringRef DimId = Token;

if (DimId.startswith("SQ_RSRC_IMG_"))

- DimId = DimId.substr(12);

+ DimId = DimId.drop_front(12);

const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);

if (!DimInfo)

- return MatchOperand_ParseFail;

+ return false;

+ Encoding = DimInfo->Encoding;

+ return true;

- lex();

+OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {

+ if (!isGFX10Plus())

+ return MatchOperand_NoMatch;

- Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,

+ SMLoc S = getLoc();

+ if (!trySkipId("dim", AsmToken::Colon))

+ return MatchOperand_NoMatch;

+ unsigned Encoding;

+ SMLoc Loc = getLoc();

+ if (!parseDimId(Encoding)) {

+ Error(Loc, "invalid dim value");

+ return MatchOperand_ParseFail;

+ }

+ Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,

AMDGPUOperand::ImmTyDim));

return MatchOperand_Success;

}

+//===----------------------------------------------------------------------===//

+// dpp

+//===----------------------------------------------------------------------===//

OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {

SMLoc S = getLoc();

@@ -7245,6 +7830,9 @@ OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {

bool

AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,

const OperandVector &Operands) {

+ if (Ctrl == "row_newbcast")

+ return isGFX90A();

if (Ctrl == "row_share" ||

Ctrl == "row_xmask")

return isGFX10Plus();

@@ -7322,6 +7910,7 @@ AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {

.Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})

.Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})

.Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})

+ .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})

.Default({-1, 0, 0});

bool Valid;

@@ -7400,6 +7989,9 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {

void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {

OptionalImmIndexMap OptionalIdx;

+ unsigned Opc = Inst.getOpcode();

+ bool HasModifiers =

+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;

unsigned I = 1;

const MCInstrDesc &Desc = MII.get(Inst.getOpcode());

for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {

@@ -7426,7 +8018,8 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I

if (IsDPP8) {

if (Op.isDPP8()) {

Op.addImmOperands(Inst, 1);

- } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

+ } else if (HasModifiers &&

+ isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

Op.addRegWithFPInputModsOperands(Inst, 2);

} else if (Op.isFI()) {

Fi = Op.getImm();

@@ -7436,8 +8029,11 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I

llvm_unreachable("Invalid operand type");

}

} else {

- if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

+ if (HasModifiers &&

+ isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {

Op.addRegWithFPInputModsOperands(Inst, 2);

+ } else if (Op.isReg()) {

+ Op.addRegOperands(Inst, 1);

} else if (Op.isDPPCtrl()) {

Op.addImmOperands(Inst, 1);

} else if (Op.isImm()) {

@@ -7691,8 +8287,6 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,

return Operand.isGDS() ? Match_Success : Match_InvalidOperand;

case MCK_lds:

return Operand.isLDS() ? Match_Success : Match_InvalidOperand;

- case MCK_glc:

- return Operand.isGLC() ? Match_Success : Match_InvalidOperand;

case MCK_idxen:

return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;

case MCK_offen: