diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
| commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
| tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | |
| parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 1072 |
1 files changed, 833 insertions, 239 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index af4a47935e3f..00032c7d4ea5 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" #include "SIInstrInfo.h" +#include "SIRegisterInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" @@ -113,9 +114,7 @@ public: ImmTyInstOffset, ImmTyOffset0, ImmTyOffset1, - ImmTyDLC, - ImmTyGLC, - ImmTySLC, + ImmTyCPol, ImmTySWZ, ImmTyTFE, ImmTyD16, @@ -299,6 +298,8 @@ public: return isRegKind() && getReg() == AMDGPU::SGPR_NULL; } + bool isVRegWithInputMods() const; + bool isSDWAOperand(MVT type) const; bool isSDWAFP16Operand() const; bool isSDWAFP32Operand() const; @@ -336,12 +337,7 @@ public: bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isLDS() const { return isImmTy(ImmTyLDS); } - bool isDLC() const { return isImmTy(ImmTyDLC); } - bool isGLC() const { return isImmTy(ImmTyGLC); } - // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced - // value of the GLC operand. - bool isGLC_1() const { return isImmTy(ImmTyGLC); } - bool isSLC() const { return isImmTy(ImmTySLC); } + bool isCPol() const { return isImmTy(ImmTyCPol); } bool isSWZ() const { return isImmTy(ImmTySWZ); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isD16() const { return isImmTy(ImmTyD16); } @@ -449,6 +445,26 @@ public: return isSSrcF16(); } + bool isSSrcV2FP32() const { + llvm_unreachable("cannot happen"); + return isSSrcF32(); + } + + bool isSCSrcV2FP32() const { + llvm_unreachable("cannot happen"); + return isSCSrcF32(); + } + + bool isSSrcV2INT32() const { + llvm_unreachable("cannot happen"); + return isSSrcB32(); + } + + bool isSCSrcV2INT32() const { + llvm_unreachable("cannot happen"); + return isSCSrcB32(); + } + bool isSSrcOrLdsB32() const { return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || isLiteralImm(MVT::i32) || isExpr(); @@ -502,6 +518,22 @@ public: return isVSrcB16() || isLiteralImm(MVT::v2i16); } + bool isVCSrcV2FP32() const { + return isVCSrcF64(); + } + + bool isVSrcV2FP32() const { + return isVSrcF64() || isLiteralImm(MVT::v2f32); + } + + bool isVCSrcV2INT32() const { + return isVCSrcB64(); + } + + bool isVSrcV2INT32() const { + return isVSrcB64() || isLiteralImm(MVT::v2i32); + } + bool isVSrcF32() const { return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); } @@ -542,6 +574,102 @@ public: return isVISrcF16() || isVISrcB32(); } + bool isVISrc_64B64() const { + return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); + } + + bool isVISrc_64F64() const { + return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); + } + + bool isVISrc_64V2FP32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); + } + + bool isVISrc_64V2INT32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); + } + + bool isVISrc_256B64() const { + return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); + } + + bool isVISrc_256F64() const { + return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); + } + + bool isVISrc_128B16() const { + return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); + } + + bool isVISrc_128V2B16() const { + return isVISrc_128B16(); + } + + bool isVISrc_128B32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); + } + + bool isVISrc_128F32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); + } + + bool isVISrc_256V2FP32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); + } + + bool isVISrc_256V2INT32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); + } + + bool isVISrc_512B32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); + } + + bool isVISrc_512B16() const { + return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); + } + + bool isVISrc_512V2B16() const { + return isVISrc_512B16(); + } + + bool isVISrc_512F32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); + } + + bool isVISrc_512F16() const { + return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); + } + + bool isVISrc_512V2F16() const { + return isVISrc_512F16() || isVISrc_512B32(); + } + + bool isVISrc_1024B32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); + } + + bool isVISrc_1024B16() const { + return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); + } + + bool isVISrc_1024V2B16() const { + return isVISrc_1024B16(); + } + + bool isVISrc_1024F32() const { + return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); + } + + bool isVISrc_1024F16() const { + return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); + } + + bool isVISrc_1024V2F16() const { + return isVISrc_1024F16() || isVISrc_1024B32(); + } + bool isAISrcB32() const { return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); } @@ -566,6 +694,14 @@ public: return isAISrcF16() || isAISrcB32(); } + bool isAISrc_64B64() const { + return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); + } + + bool isAISrc_64F64() const { + return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); + } + bool isAISrc_128B32() const { return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); } @@ -590,6 +726,22 @@ public: return isAISrc_128F16() || isAISrc_128B32(); } + bool isVISrc_128F16() const { + return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); + } + + bool isVISrc_128V2F16() const { + return isVISrc_128F16() || isVISrc_128B32(); + } + + bool isAISrc_256B64() const { + return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); + } + + bool isAISrc_256F64() const { + return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); + } + bool isAISrc_512B32() const { return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); } @@ -837,9 +989,7 @@ public: case ImmTyInstOffset: OS << "InstOffset"; break; case ImmTyOffset0: OS << "Offset0"; break; case ImmTyOffset1: OS << "Offset1"; break; - case ImmTyDLC: OS << "DLC"; break; - case ImmTyGLC: OS << "GLC"; break; - case ImmTySLC: OS << "SLC"; break; + case ImmTyCPol: OS << "CPol"; break; case ImmTySWZ: OS << "SWZ"; break; case ImmTyTFE: OS << "TFE"; break; case ImmTyD16: OS << "D16"; break; @@ -1021,6 +1171,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool ForcedDPP = false; bool ForcedSDWA = false; KernelScopeInfo KernelScope; + unsigned CPolSeen; /// @name Auto-generated Match Functions /// { @@ -1061,7 +1212,8 @@ private: bool ParseDirectiveHSACodeObjectISA(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); - bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; + // TODO: Possibly make subtargetHasRegister const. + bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); bool ParseDirectiveAMDGPUHsaKernel(); bool ParseDirectiveISAVersion(); @@ -1105,7 +1257,7 @@ private: bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth); void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, - bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); + bool IsAtomic, bool IsLds = false); void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, bool IsGdsHardcoded); @@ -1140,7 +1292,7 @@ public: // AsmParser::parseDirectiveSet() cannot be specialized for specific target. AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); MCContext &Ctx = getContext(); - if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { + if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); @@ -1157,7 +1309,7 @@ public: Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); } - if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { + if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { initializeGprCountSymbol(IS_VGPR); initializeGprCountSymbol(IS_SGPR); } else @@ -1165,10 +1317,6 @@ public: } } - bool hasXNACK() const { - return AMDGPU::hasXNACK(getSTI()); - } - bool hasMIMG_R128() const { return AMDGPU::hasMIMG_R128(getSTI()); } @@ -1181,6 +1329,8 @@ public: return AMDGPU::hasGFX10A16(getSTI()); } + bool hasG16() const { return AMDGPU::hasG16(getSTI()); } + bool isSI() const { return AMDGPU::isSI(getSTI()); } @@ -1197,6 +1347,10 @@ public: return AMDGPU::isGFX9(getSTI()); } + bool isGFX90A() const { + return AMDGPU::isGFX90A(getSTI()); + } + bool isGFX9Plus() const { return AMDGPU::isGFX9Plus(getSTI()); } @@ -1219,6 +1373,10 @@ public: return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; } + bool hasArchitectedFlatScratch() const { + return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; + } + bool hasSGPR102_SGPR103() const { return !isVI() && !isGFX9(); } @@ -1294,8 +1452,9 @@ public: bool (*ConvertResult)(int64_t&) = nullptr); OperandMatchResultTy - parseNamedBit(const char *Name, OperandVector &Operands, + parseNamedBit(StringRef Name, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); + OperandMatchResultTy parseCPol(OperandVector &Operands); OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, StringRef &Value, SMLoc &StringLoc); @@ -1379,14 +1538,19 @@ private: bool validateMIMGAddrSize(const MCInst &Inst); bool validateMIMGD16(const MCInst &Inst); bool validateMIMGDim(const MCInst &Inst); - bool validateLdsDirect(const MCInst &Inst); + bool validateMIMGMSAA(const MCInst &Inst); bool validateOpSel(const MCInst &Inst); + bool validateDPP(const MCInst &Inst, const OperandVector &Operands); bool validateVccOperand(unsigned Reg) const; bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); + bool validateAGPRLdSt(const MCInst &Inst) const; + bool validateVGPRAlign(const MCInst &Inst) const; + bool validateGWS(const MCInst &Inst, const OperandVector &Operands); bool validateDivScale(const MCInst &Inst); bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc); + Optional<StringRef> validateLdsDirect(const MCInst &Inst); unsigned getConstantBusLimit(unsigned Opcode) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; @@ -1403,6 +1567,7 @@ private: bool isId(const AsmToken &Token, const StringRef Id) const; bool isToken(const AsmToken::TokenKind Kind) const; bool trySkipId(const StringRef Id); + bool trySkipId(const StringRef Pref, const StringRef Id); bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); bool trySkipToken(const AsmToken::TokenKind Kind); bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); @@ -1420,6 +1585,8 @@ private: void lex(); public: + void onBeginOfFile() override; + OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); @@ -1451,16 +1618,12 @@ public: OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); int64_t parseGPRIdxMacro(); - void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } - void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } - void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } - void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } + void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } + void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); - AMDGPUOperand::Ptr defaultDLC() const; - AMDGPUOperand::Ptr defaultGLC() const; - AMDGPUOperand::Ptr defaultGLC_1() const; - AMDGPUOperand::Ptr defaultSLC() const; + AMDGPUOperand::Ptr defaultCPol() const; AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMEMOffset() const; @@ -1474,6 +1637,8 @@ public: void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); @@ -1482,6 +1647,9 @@ public: void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); + void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); + + bool parseDimId(unsigned &Encoding); OperandMatchResultTy parseDim(OperandVector &Operands); OperandMatchResultTy parseDPP8(OperandVector &Operands); OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); @@ -1551,11 +1719,16 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_AC_INT32: case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: + case AMDGPU::OPERAND_REG_IMM_V2FP32: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: + case AMDGPU::OPERAND_REG_IMM_V2INT32: return &APFloat::IEEEsingle(); case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_AC_FP64: return &APFloat::IEEEdouble(); case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: @@ -1715,7 +1888,8 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const { // literal goes into the lower half and the upper half is zero. We also // require that the literal may be losslesly converted to f16. MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : - (type == MVT::v2i16)? MVT::i16 : type; + (type == MVT::v2i16)? MVT::i16 : + (type == MVT::v2f32)? MVT::f32 : type; APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); @@ -1725,6 +1899,13 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const { return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); } +bool AMDGPUOperand::isVRegWithInputMods() const { + return isRegClass(AMDGPU::VGPR_32RegClassID) || + // GFX90A allows DPP on 64-bit operands. + (isRegClass(AMDGPU::VReg_64RegClassID) && + AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); +} + bool AMDGPUOperand::isSDWAOperand(MVT type) const { if (AsmParser->isVI()) return isVReg32(); @@ -1751,8 +1932,9 @@ bool AMDGPUOperand::isSDWAInt32Operand() const { } bool AMDGPUOperand::isBoolReg() const { - return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || - (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); + auto FB = AsmParser->getFeatureBits(); + return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || + (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); } uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const @@ -1806,6 +1988,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_AC_FP64: if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); @@ -1849,7 +2032,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2INT16: - case AMDGPU::OPERAND_REG_IMM_V2FP16: { + case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: + case AMDGPU::OPERAND_REG_IMM_V2FP32: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: + case AMDGPU::OPERAND_REG_IMM_V2INT32: { bool lost; APFloat FPLiteral(APFloat::IEEEdouble(), Literal); // Convert literal to single precision @@ -1881,6 +2068,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_AC_FP32: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP32: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: + case AMDGPU::OPERAND_REG_IMM_V2INT32: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: if (isSafeTruncation(Val, 32) && AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), AsmParser->hasInv2PiInlineImm())) { @@ -1897,6 +2088,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_AC_FP64: if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); setImmKindConst(); @@ -2000,6 +2192,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 4: return AMDGPU::VReg_128RegClassID; case 5: return AMDGPU::VReg_160RegClassID; case 6: return AMDGPU::VReg_192RegClassID; + case 7: return AMDGPU::VReg_224RegClassID; case 8: return AMDGPU::VReg_256RegClassID; case 16: return AMDGPU::VReg_512RegClassID; case 32: return AMDGPU::VReg_1024RegClassID; @@ -2022,6 +2215,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 4: return AMDGPU::SGPR_128RegClassID; case 5: return AMDGPU::SGPR_160RegClassID; case 6: return AMDGPU::SGPR_192RegClassID; + case 7: return AMDGPU::SGPR_224RegClassID; case 8: return AMDGPU::SGPR_256RegClassID; case 16: return AMDGPU::SGPR_512RegClassID; } @@ -2034,6 +2228,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 4: return AMDGPU::AReg_128RegClassID; case 5: return AMDGPU::AReg_160RegClassID; case 6: return AMDGPU::AReg_192RegClassID; + case 7: return AMDGPU::AReg_224RegClassID; case 8: return AMDGPU::AReg_256RegClassID; case 16: return AMDGPU::AReg_512RegClassID; case 32: return AMDGPU::AReg_1024RegClassID; @@ -2529,7 +2724,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { return nullptr; } - if (isHsaAbiVersion3(&getSTI())) { + if (isHsaAbiVersion3Or4(&getSTI())) { if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) return nullptr; } else @@ -3200,7 +3395,7 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { return true; unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); - unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; + unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; if (DMask == 0) DMask = 1; @@ -3230,6 +3425,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); + int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); assert(VAddr0Idx != -1); assert(SrsrcIdx != -1); @@ -3241,22 +3437,26 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { unsigned Dim = Inst.getOperand(DimIdx).getImm(); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); bool IsNSA = SrsrcIdx - VAddr0Idx > 1; - unsigned VAddrSize = + unsigned ActualAddrSize = IsNSA ? SrsrcIdx - VAddr0Idx : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; + bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); + + unsigned ExpectedAddrSize = + AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); - unsigned AddrSize = BaseOpcode->NumExtraArgs + - (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + - (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + - (BaseOpcode->LodOrClampOrMip ? 1 : 0); if (!IsNSA) { - if (AddrSize > 8) - AddrSize = 16; - else if (AddrSize > 4) - AddrSize = 8; + if (ExpectedAddrSize > 8) + ExpectedAddrSize = 16; + + // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. + // This provides backward compatibility for assembly created + // before 160b/192b/224b types were directly supported. + if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) + return true; } - return VAddrSize == AddrSize; + return ActualAddrSize == ExpectedAddrSize; } bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { @@ -3298,6 +3498,29 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; } +bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) + return true; + + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + + if (!BaseOpcode->MSAA) + return true; + + int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); + assert(DimIdx != -1); + + unsigned Dim = Inst.getOperand(DimIdx).getImm(); + const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); + + return DimInfo->MSAA; +} + static bool IsMovrelsSDWAOpcode(const unsigned Opcode) { switch (Opcode) { @@ -3559,7 +3782,7 @@ static bool IsRevOpcode(const unsigned Opcode) } } -bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { +Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { using namespace SIInstrFlags; const unsigned Opcode = Inst.getOpcode(); @@ -3567,33 +3790,29 @@ bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { // lds_direct register is defined so that it can be used // with 9-bit operands only. Ignore encodings which do not accept these. - if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) - return true; + const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; + if ((Desc.TSFlags & Enc) == 0) + return None; - const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); - const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); - const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { + auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); + if (SrcIdx == -1) + break; + const auto &Src = Inst.getOperand(SrcIdx); + if (Src.isReg() && Src.getReg() == LDS_DIRECT) { - const int SrcIndices[] = { Src1Idx, Src2Idx }; + if (isGFX90A()) + return StringRef("lds_direct is not supported on this GPU"); - // lds_direct cannot be specified as either src1 or src2. - for (int SrcIdx : SrcIndices) { - if (SrcIdx == -1) break; - const MCOperand &Src = Inst.getOperand(SrcIdx); - if (Src.isReg() && Src.getReg() == LDS_DIRECT) { - return false; + if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) + return StringRef("lds_direct cannot be used with this instruction"); + + if (SrcName != OpName::src0) + return StringRef("lds_direct may be used as src0 only"); } } - if (Src0Idx == -1) - return true; - - const MCOperand &Src = Inst.getOperand(Src0Idx); - if (!Src.isReg() || Src.getReg() != LDS_DIRECT) - return true; - - // lds_direct is specified as src0. Check additional limitations. - return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); + return None; } SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { @@ -3624,7 +3843,7 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, // For FLAT segment the offset must be positive; // MSB is ignored and forced to zero. - if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { + if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); if (!isIntN(OffsetSize, Op.getImm())) { Error(getFlatOffsetLoc(Operands), @@ -3733,6 +3952,28 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { return true; } +bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, + const OperandVector &Operands) { + const unsigned Opc = Inst.getOpcode(); + int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); + if (DppCtrlIdx < 0) + return true; + unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); + + if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { + // DPP64 is supported for row_newbcast only. + int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); + if (Src0Idx >= 0 && + getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); + Error(S, "64 bit dpp only supports row_newbcast"); + return false; + } + } + + return true; +} + // Check if VCC register matches wavefront size bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { auto FB = getFeatureBits(); @@ -3802,18 +4043,148 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, return true; } +// Returns -1 if not a register, 0 if VGPR and 1 if AGPR. +static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, + const MCRegisterInfo *MRI) { + int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); + if (OpIdx < 0) + return -1; + + const MCOperand &Op = Inst.getOperand(OpIdx); + if (!Op.isReg()) + return -1; + + unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); + auto Reg = Sub ? Sub : Op.getReg(); + const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); + return AGPR32.contains(Reg) ? 1 : 0; +} + +bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | + SIInstrFlags::MTBUF | SIInstrFlags::MIMG | + SIInstrFlags::DS)) == 0) + return true; + + uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 + : AMDGPU::OpName::vdata; + + const MCRegisterInfo *MRI = getMRI(); + int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); + int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); + + if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { + int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); + if (Data2Areg >= 0 && Data2Areg != DataAreg) + return false; + } + + auto FB = getFeatureBits(); + if (FB[AMDGPU::FeatureGFX90AInsts]) { + if (DataAreg < 0 || DstAreg < 0) + return true; + return DstAreg == DataAreg; + } + + return DstAreg < 1 && DataAreg < 1; +} + +bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { + auto FB = getFeatureBits(); + if (!FB[AMDGPU::FeatureGFX90AInsts]) + return true; + + const MCRegisterInfo *MRI = getMRI(); + const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); + const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); + for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { + const MCOperand &Op = Inst.getOperand(I); + if (!Op.isReg()) + continue; + + unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); + if (!Sub) + continue; + + if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) + return false; + if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) + return false; + } + + return true; +} + +// gfx90a has an undocumented limitation: +// DS_GWS opcodes must use even aligned registers. +bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, + const OperandVector &Operands) { + if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) + return true; + + int Opc = Inst.getOpcode(); + if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && + Opc != AMDGPU::DS_GWS_SEMA_BR_vi) + return true; + + const MCRegisterInfo *MRI = getMRI(); + const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); + int Data0Pos = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); + assert(Data0Pos != -1); + auto Reg = Inst.getOperand(Data0Pos).getReg(); + auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); + if (RegIdx & 1) { + SMLoc RegLoc = getRegLoc(Reg, Operands); + Error(RegLoc, "vgpr must be even aligned"); + return false; + } + + return true; +} + bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc) { - int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::glc1); - if (GLCPos != -1) { - // -1 is set by GLC_1 default operand. In all cases "glc" must be present - // in the asm string, and the default value means it is not present. - if (Inst.getOperand(GLCPos).getImm() == -1) { + int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::cpol); + if (CPolPos == -1) + return true; + + unsigned CPol = Inst.getOperand(CPolPos).getImm(); + + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if ((TSFlags & (SIInstrFlags::SMRD)) && + (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { + Error(IDLoc, "invalid cache policy for SMRD instruction"); + return false; + } + + if (isGFX90A() && (CPol & CPol::SCC)) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); + StringRef CStr(S.getPointer()); + S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); + Error(S, "scc is not supported on this GPU"); + return false; + } + + if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) + return true; + + if (TSFlags & SIInstrFlags::IsAtomicRet) { + if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { Error(IDLoc, "instruction must use glc"); return false; } + } else { + if (CPol & CPol::GLC) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); + StringRef CStr(S.getPointer()); + S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); + Error(S, "instruction must not use glc"); + return false; + } } return true; @@ -3822,9 +4193,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands) { - if (!validateLdsDirect(Inst)) { - Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), - "invalid use of lds_direct"); + if (auto ErrMsg = validateLdsDirect(Inst)) { + Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); return false; } if (!validateSOPLiteral(Inst)) { @@ -3851,6 +4221,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "invalid op_sel operand"); return false; } + if (!validateDPP(Inst, Operands)) { + return false; + } // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. if (!validateMIMGD16(Inst)) { Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), @@ -3861,6 +4234,11 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, Error(IDLoc, "dim modifier is required on this GPU"); return false; } + if (!validateMIMGMSAA(Inst)) { + Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), + "invalid dim; must be MSAA type"); + return false; + } if (!validateMIMGDataSize(Inst)) { Error(IDLoc, "image data size does not match dmask and tfe"); @@ -3893,6 +4271,26 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, if (!validateMAIAccWrite(Inst, Operands)) { return false; } + if (!validateCoherencyBits(Inst, Operands, IDLoc)) { + return false; + } + + if (!validateAGPRLdSt(Inst)) { + Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] + ? "invalid register class: data and dst should be all VGPR or AGPR" + : "invalid register class: agpr loads and stores not supported on this GPU" + ); + return false; + } + if (!validateVGPRAlign(Inst)) { + Error(IDLoc, + "invalid register class: vgpr tuples must be 64 bit aligned"); + return false; + } + if (!validateGWS(Inst, Operands)) { + return false; + } + if (!validateDivScale(Inst)) { Error(IDLoc, "ABS not allowed in VOP3B instructions"); return false; @@ -4062,21 +4460,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) return TokError("directive only supported for amdgcn architecture"); - std::string Target; - - SMLoc TargetStart = getLoc(); - if (getParser().parseEscapedString(Target)) + std::string TargetIDDirective; + SMLoc TargetStart = getTok().getLoc(); + if (getParser().parseEscapedString(TargetIDDirective)) return true; - SMRange TargetRange = SMRange(TargetStart, getLoc()); - std::string ExpectedTarget; - raw_string_ostream ExpectedTargetOS(ExpectedTarget); - IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); + SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); + if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) + return getParser().Error(TargetRange.Start, + (Twine(".amdgcn_target directive's target id ") + + Twine(TargetIDDirective) + + Twine(" does not match the specified target id ") + + Twine(getTargetStreamer().getTargetID()->toString())).str()); - if (Target != ExpectedTargetOS.str()) - return Error(TargetRange.Start, "target must match options", TargetRange); - - getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); return false; } @@ -4143,12 +4539,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { SMRange VGPRRange; uint64_t NextFreeVGPR = 0; + uint64_t AccumOffset = 0; SMRange SGPRRange; uint64_t NextFreeSGPR = 0; unsigned UserSGPRCount = 0; bool ReserveVCC = true; bool ReserveFlatScr = true; - bool ReserveXNACK = hasXNACK(); Optional<bool> EnableWavefrontSize32; while (true) { @@ -4191,7 +4587,15 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) return OutOfRangeError(ValRange); KD.private_segment_fixed_size = Val; + } else if (ID == ".amdhsa_kernarg_size") { + if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) + return OutOfRangeError(ValRange); + KD.kernarg_size = Val; } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { + if (hasArchitectedFlatScratch()) + return Error(IDRange.Start, + "directive is not supported with architected flat scratch", + IDRange); PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, Val, ValRange); @@ -4222,6 +4626,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (Val) UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { + if (hasArchitectedFlatScratch()) + return Error(IDRange.Start, + "directive is not supported with architected flat scratch", + IDRange); PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, ValRange); @@ -4241,10 +4649,20 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Val, ValRange); } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { - PARSE_BITS_ENTRY( - KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, - ValRange); + if (hasArchitectedFlatScratch()) + return Error(IDRange.Start, + "directive is not supported with architected flat scratch", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); + } else if (ID == ".amdhsa_enable_private_segment") { + if (!hasArchitectedFlatScratch()) + return Error( + IDRange.Start, + "directive is not supported without architected flat scratch", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, @@ -4271,6 +4689,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { } else if (ID == ".amdhsa_next_free_sgpr") { SGPRRange = ValRange; NextFreeSGPR = Val; + } else if (ID == ".amdhsa_accum_offset") { + if (!isGFX90A()) + return Error(IDRange.Start, "directive requires gfx90a+", IDRange); + AccumOffset = Val; } else if (ID == ".amdhsa_reserve_vcc") { if (!isUInt<1>(Val)) return OutOfRangeError(ValRange); @@ -4278,6 +4700,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { } else if (ID == ".amdhsa_reserve_flat_scratch") { if (IVersion.Major < 7) return Error(IDRange.Start, "directive requires gfx7+", IDRange); + if (hasArchitectedFlatScratch()) + return Error(IDRange.Start, + "directive is not supported with architected flat scratch", + IDRange); if (!isUInt<1>(Val)) return OutOfRangeError(ValRange); ReserveFlatScr = Val; @@ -4286,7 +4712,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { return Error(IDRange.Start, "directive requires gfx8+", IDRange); if (!isUInt<1>(Val)) return OutOfRangeError(ValRange); - ReserveXNACK = Val; + if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) + return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", + IDRange); } else if (ID == ".amdhsa_float_round_mode_32") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); @@ -4311,6 +4739,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { return Error(IDRange.Start, "directive requires gfx9+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, ValRange); + } else if (ID == ".amdhsa_tg_split") { + if (!isGFX90A()) + return Error(IDRange.Start, "directive requires gfx90a+", IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, + ValRange); } else if (ID == ".amdhsa_workgroup_processor_mode") { if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); @@ -4372,7 +4805,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { unsigned VGPRBlocks; unsigned SGPRBlocks; if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, - ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, + getTargetStreamer().getTargetID()->isXnackOnOrAny(), + EnableWavefrontSize32, NextFreeVGPR, VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, SGPRBlocks)) return true; @@ -4395,9 +4829,21 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, UserSGPRCount); + if (isGFX90A()) { + if (Seen.find(".amdhsa_accum_offset") == Seen.end()) + return TokError(".amdhsa_accum_offset directive is required"); + if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) + return TokError("accum_offset should be in range [4..256] in " + "increments of 4"); + if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) + return TokError("accum_offset exceeds total VGPR allocation"); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, + (AccumOffset / 4 - 1)); + } + getTargetStreamer().EmitAmdhsaKernelDescriptor( getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, - ReserveFlatScr, ReserveXNACK); + ReserveFlatScr); return false; } @@ -4423,9 +4869,9 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { // targeted GPU. if (isToken(AsmToken::EndOfStatement)) { AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); - getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, - ISA.Stepping, - "AMD", "AMDGPU"); + getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, + ISA.Stepping, + "AMD", "AMDGPU"); return false; } @@ -4450,8 +4896,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { if (!parseString(ArchName, "invalid arch name")) return true; - getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, - VendorName, ArchName); + getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, + VendorName, ArchName); return false; } @@ -4560,19 +5006,11 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() { "architectures"); } - auto ISAVersionStringFromASM = getToken().getStringContents(); + auto TargetIDDirective = getLexer().getTok().getStringContents(); + if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) + return Error(getParser().getTok().getLoc(), "target id must match options"); - std::string ISAVersionStringFromSTI; - raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); - IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); - - if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { - return Error(getLoc(), - ".amd_amdgpu_isa directive does not match triple and/or mcpu " - "arguments specified through the command line"); - } - - getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); + getTargetStreamer().EmitISAVersion(); Lex(); return false; @@ -4582,7 +5020,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { const char *AssemblerDirectiveBegin; const char *AssemblerDirectiveEnd; std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = - isHsaAbiVersion3(&getSTI()) + isHsaAbiVersion3Or4(&getSTI()) ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, HSAMD::V3::AssemblerDirectiveEnd) : std::make_tuple(HSAMD::AssemblerDirectiveBegin, @@ -4599,7 +5037,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { HSAMetadataString)) return true; - if (isHsaAbiVersion3(&getSTI())) { + if (isHsaAbiVersion3Or4(&getSTI())) { if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) return Error(getLoc(), "invalid HSA metadata"); } else { @@ -4749,12 +5187,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); - if (isHsaAbiVersion3(&getSTI())) { - if (IDVal == ".amdgcn_target") - return ParseDirectiveAMDGCNTarget(); - + if (isHsaAbiVersion3Or4(&getSTI())) { if (IDVal == ".amdhsa_kernel") - return ParseDirectiveAMDHSAKernel(); + return ParseDirectiveAMDHSAKernel(); // TODO: Restructure/combine with PAL metadata directive. if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) @@ -4779,6 +5214,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { return ParseDirectiveHSAMetadata(); } + if (IDVal == ".amdgcn_target") + return ParseDirectiveAMDGCNTarget(); + if (IDVal == ".amdgpu_lds") return ParseDirectiveAMDGPULDS(); @@ -4792,7 +5230,7 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { } bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, - unsigned RegNo) const { + unsigned RegNo) { for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); R.isValid(); ++R) { @@ -4824,7 +5262,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, case AMDGPU::XNACK_MASK: case AMDGPU::XNACK_MASK_LO: case AMDGPU::XNACK_MASK_HI: - return (isVI() || isGFX9()) && hasXNACK(); + return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); case AMDGPU::SGPR_NULL: return isGFX10Plus(); default: @@ -4881,16 +5319,21 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, unsigned Prefix = Operands.size(); for (;;) { + auto Loc = getLoc(); ResTy = parseReg(Operands); + if (ResTy == MatchOperand_NoMatch) + Error(Loc, "expected a register"); if (ResTy != MatchOperand_Success) - return ResTy; + return MatchOperand_ParseFail; RBraceLoc = getLoc(); if (trySkipToken(AsmToken::RBrac)) break; - if (!trySkipToken(AsmToken::Comma)) + if (!skipToken(AsmToken::Comma, + "expected a comma or a closing square bracket")) { return MatchOperand_ParseFail; + } } if (Operands.size() - Prefix > 1) { @@ -4940,11 +5383,9 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, OperandMode Mode = OperandMode_Default; if (IsMIMG && isGFX10Plus() && Operands.size() == 2) Mode = OperandMode_NSA; + CPolSeen = 0; OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); - // Eat the comma or space if there is one. - trySkipToken(AsmToken::Comma); - if (Res != MatchOperand_Success) { checkUnsupportedInstruction(Name, NameLoc); if (!Parser.hasPendingError()) { @@ -4959,6 +5400,9 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, } return true; } + + // Eat the comma or space if there is one. + trySkipToken(AsmToken::Comma); } return false; @@ -5043,39 +5487,27 @@ AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, } OperandMatchResultTy -AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, +AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy) { - int64_t Bit = 0; + int64_t Bit; SMLoc S = getLoc(); - // We are at the end of the statement, and this is a default argument, so - // use a default value. - if (!isToken(AsmToken::EndOfStatement)) { - switch(getTokenKind()) { - case AsmToken::Identifier: { - StringRef Tok = getTokenStr(); - if (Tok == Name) { - if (Tok == "r128" && !hasMIMG_R128()) - Error(S, "r128 modifier is not supported on this GPU"); - if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) - Error(S, "a16 modifier is not supported on this GPU"); - Bit = 1; - Parser.Lex(); - } else if (Tok.startswith("no") && Tok.endswith(Name)) { - Bit = 0; - Parser.Lex(); - } else { - return MatchOperand_NoMatch; - } - break; - } - default: - return MatchOperand_NoMatch; - } + if (trySkipId(Name)) { + Bit = 1; + } else if (trySkipId("no", Name)) { + Bit = 0; + } else { + return MatchOperand_NoMatch; } - if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) + if (Name == "r128" && !hasMIMG_R128()) { + Error(S, "r128 modifier is not supported on this GPU"); + return MatchOperand_ParseFail; + } + if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { + Error(S, "a16 modifier is not supported on this GPU"); return MatchOperand_ParseFail; + } if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) ImmTy = AMDGPUOperand::ImmTyR128A16; @@ -5084,6 +5516,62 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, return MatchOperand_Success; } +OperandMatchResultTy +AMDGPUAsmParser::parseCPol(OperandVector &Operands) { + unsigned CPolOn = 0; + unsigned CPolOff = 0; + SMLoc S = getLoc(); + + if (trySkipId("glc")) + CPolOn = AMDGPU::CPol::GLC; + else if (trySkipId("noglc")) + CPolOff = AMDGPU::CPol::GLC; + else if (trySkipId("slc")) + CPolOn = AMDGPU::CPol::SLC; + else if (trySkipId("noslc")) + CPolOff = AMDGPU::CPol::SLC; + else if (trySkipId("dlc")) + CPolOn = AMDGPU::CPol::DLC; + else if (trySkipId("nodlc")) + CPolOff = AMDGPU::CPol::DLC; + else if (trySkipId("scc")) + CPolOn = AMDGPU::CPol::SCC; + else if (trySkipId("noscc")) + CPolOff = AMDGPU::CPol::SCC; + else + return MatchOperand_NoMatch; + + if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { + Error(S, "dlc modifier is not supported on this GPU"); + return MatchOperand_ParseFail; + } + + if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { + Error(S, "scc modifier is not supported on this GPU"); + return MatchOperand_ParseFail; + } + + if (CPolSeen & (CPolOn | CPolOff)) { + Error(S, "duplicate cache policy modifier"); + return MatchOperand_ParseFail; + } + + CPolSeen |= (CPolOn | CPolOff); + + for (unsigned I = 1; I != Operands.size(); ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isCPol()) { + Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); + return MatchOperand_Success; + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, + AMDGPUOperand::ImmTyCPol)); + + return MatchOperand_Success; +} + static void addOptionalImmOperand( MCInst& Inst, const OperandVector& Operands, AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, @@ -5757,7 +6245,7 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, } return false; } - if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { + if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { Error(Op.Loc, "invalid operation id"); return false; } @@ -5765,7 +6253,7 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, Error(Stream.Loc, "message operation does not support streams"); return false; } - if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { + if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { Error(Stream.Loc, "invalid message stream id"); return false; } @@ -5934,6 +6422,18 @@ AMDGPUAsmParser::trySkipId(const StringRef Id) { } bool +AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { + if (isToken(AsmToken::Identifier)) { + StringRef Tok = getTokenStr(); + if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { + lex(); + return true; + } + } + return false; +} + +bool AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { if (isId(Id) && peekToken().is(Kind)) { lex(); @@ -6489,32 +6989,38 @@ AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { // mubuf //===----------------------------------------------------------------------===// -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { - return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); -} - -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { - return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); -} - -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { - return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); -} - -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { - return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); } void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, - const OperandVector &Operands, - bool IsAtomic, - bool IsAtomicReturn, - bool IsLds) { + const OperandVector &Operands, + bool IsAtomic, + bool IsLds) { bool IsLdsOpcode = IsLds; bool HasLdsModifier = false; OptionalImmIndexMap OptionalIdx; - assert(IsAtomicReturn ? IsAtomic : true); unsigned FirstOperandIdx = 1; + bool IsAtomicReturn = false; + + if (IsAtomic) { + for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (!Op.isCPol()) + continue; + IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; + break; + } + + if (!IsAtomicReturn) { + int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); + if (NewOpc != -1) + Inst.setOpcode(NewOpc); + } + + IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & + SIInstrFlags::IsAtomicRet; + } for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -6565,18 +7071,12 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); - if (!IsAtomic || IsAtomicReturn) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, - IsAtomicReturn ? -1 : 0); - } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); if (!IsLdsOpcode) { // tfe is not legal with lds opcodes addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } - - if (isGFX10Plus()) - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); } void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { @@ -6611,12 +7111,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); - - if (isGFX10Plus()) - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); } //===----------------------------------------------------------------------===// @@ -6658,14 +7155,12 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, if (IsGFX10Plus) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); - if (IsGFX10Plus) - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); if (IsGFX10Plus) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); if (!IsGFX10Plus) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); @@ -6676,6 +7171,61 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) cvtMIMG(Inst, Operands, true); } +void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + bool IsAtomicReturn = false; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (!Op.isCPol()) + continue; + IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; + break; + } + + if (!IsAtomicReturn) { + int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); + if (NewOpc != -1) + Inst.setOpcode(NewOpc); + } + + IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & + SIInstrFlags::IsAtomicRet; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + if (IsAtomicReturn && i == 1) + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); + continue; + } + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + if ((int)Inst.getNumOperands() <= + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); +} + void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, const OperandVector &Operands) { for (unsigned I = 1; I < Operands.size(); ++I) { @@ -6747,17 +7297,14 @@ static bool ConvertOmodDiv(int64_t &Div) { return false; } +// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. +// This is intentional and ensures compatibility with sp3. +// See bug 35397 for details. static bool ConvertBoundCtrl(int64_t &BoundCtrl) { - if (BoundCtrl == 0) { + if (BoundCtrl == 0 || BoundCtrl == 1) { BoundCtrl = 1; return true; } - - if (BoundCtrl == -1) { - BoundCtrl = 0; - return true; - } - return false; } @@ -6772,9 +7319,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, - {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, - {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, - {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, + {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, @@ -6808,6 +7353,18 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} }; +void AMDGPUAsmParser::onBeginOfFile() { + if (!getParser().getStreamer().getTargetStreamer() || + getSTI().getTargetTriple().getArch() == Triple::r600) + return; + + if (!getTargetStreamer().getTargetID()) + getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); + + if (isHsaAbiVersion3Or4(&getSTI())) + getTargetStreamer().EmitDirectiveAMDGCNTarget(); +} + OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { OperandMatchResultTy res = parseOptionalOpr(Operands); @@ -6857,6 +7414,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) Op.ConvertResult); } else if (Op.Type == AMDGPUOperand::ImmTyDim) { res = parseDim(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { + res = parseCPol(Operands); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); } @@ -7010,6 +7569,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || Opc == AMDGPU::V_MAC_F16_e64_vi || + Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || Opc == AMDGPU::V_FMAC_F32_e64_vi || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || @@ -7028,16 +7588,13 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { cvtVOP3(Inst, Operands, OptionalIdx); } -void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, - const OperandVector &Operands) { - OptionalImmIndexMap OptIdx; +void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptIdx) { const int Opc = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Opc); const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; - cvtVOP3(Inst, Operands, OptIdx); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); @@ -7046,7 +7603,10 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 // instruction, and then figure out where to actually put the modifiers - addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx != -1) { + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); + } int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); if (OpSelHiIdx != -1) { @@ -7057,7 +7617,6 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); if (NegLoIdx != -1) { - assert(IsPacked); addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); } @@ -7069,16 +7628,16 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers }; - int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); - - unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + unsigned OpSel = 0; unsigned OpSelHi = 0; unsigned NegLo = 0; unsigned NegHi = 0; - if (OpSelHiIdx != -1) { + if (OpSelIdx != -1) + OpSel = Inst.getOperand(OpSelIdx).getImm(); + + if (OpSelHiIdx != -1) OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); - } if (NegLoIdx != -1) { int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); @@ -7111,6 +7670,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, } } +void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptIdx; + cvtVOP3(Inst, Operands, OptIdx); + cvtVOP3P(Inst, Operands, OptIdx); +} + //===----------------------------------------------------------------------===// // dpp //===----------------------------------------------------------------------===// @@ -7167,44 +7732,64 @@ bool AMDGPUOperand::isU16Imm() const { return isImm() && isUInt<16>(getImm()); } -OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { - if (!isGFX10Plus()) - return MatchOperand_NoMatch; - - SMLoc S = getLoc(); - - if (!trySkipId("dim", AsmToken::Colon)) - return MatchOperand_NoMatch; +//===----------------------------------------------------------------------===// +// dim +//===----------------------------------------------------------------------===// - // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an - // integer. +bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { + // We want to allow "dim:1D" etc., + // but the initial 1 is tokenized as an integer. std::string Token; if (isToken(AsmToken::Integer)) { SMLoc Loc = getToken().getEndLoc(); Token = std::string(getTokenStr()); lex(); if (getLoc() != Loc) - return MatchOperand_ParseFail; + return false; } - if (!isToken(AsmToken::Identifier)) - return MatchOperand_ParseFail; - Token += getTokenStr(); + + StringRef Suffix; + if (!parseId(Suffix)) + return false; + Token += Suffix; StringRef DimId = Token; if (DimId.startswith("SQ_RSRC_IMG_")) - DimId = DimId.substr(12); + DimId = DimId.drop_front(12); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); if (!DimInfo) - return MatchOperand_ParseFail; + return false; + + Encoding = DimInfo->Encoding; + return true; +} - lex(); +OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { + if (!isGFX10Plus()) + return MatchOperand_NoMatch; - Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, + SMLoc S = getLoc(); + + if (!trySkipId("dim", AsmToken::Colon)) + return MatchOperand_NoMatch; + + unsigned Encoding; + SMLoc Loc = getLoc(); + if (!parseDimId(Encoding)) { + Error(Loc, "invalid dim value"); + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, AMDGPUOperand::ImmTyDim)); return MatchOperand_Success; } +//===----------------------------------------------------------------------===// +// dpp +//===----------------------------------------------------------------------===// + OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { SMLoc S = getLoc(); @@ -7245,6 +7830,9 @@ OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { bool AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands) { + if (Ctrl == "row_newbcast") + return isGFX90A(); + if (Ctrl == "row_share" || Ctrl == "row_xmask") return isGFX10Plus(); @@ -7322,6 +7910,7 @@ AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) + .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) .Default({-1, 0, 0}); bool Valid; @@ -7400,6 +7989,9 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; + unsigned Opc = Inst.getOpcode(); + bool HasModifiers = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { @@ -7426,7 +8018,8 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I if (IsDPP8) { if (Op.isDPP8()) { Op.addImmOperands(Inst, 1); - } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + } else if (HasModifiers && + isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { Op.addRegWithFPInputModsOperands(Inst, 2); } else if (Op.isFI()) { Fi = Op.getImm(); @@ -7436,8 +8029,11 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I llvm_unreachable("Invalid operand type"); } } else { - if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + if (HasModifiers && + isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { Op.addRegWithFPInputModsOperands(Inst, 2); + } else if (Op.isReg()) { + Op.addRegOperands(Inst, 1); } else if (Op.isDPPCtrl()) { Op.addImmOperands(Inst, 1); } else if (Op.isImm()) { @@ -7691,8 +8287,6 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, return Operand.isGDS() ? Match_Success : Match_InvalidOperand; case MCK_lds: return Operand.isLDS() ? Match_Success : Match_InvalidOperand; - case MCK_glc: - return Operand.isGLC() ? Match_Success : Match_InvalidOperand; case MCK_idxen: return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; case MCK_offen: |
