diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 1146 |
1 files changed, 966 insertions, 180 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index ffe626513d47..e12d0ffef35c 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -20,10 +20,13 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" @@ -33,6 +36,7 @@ #include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Casting.h" #include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetParser.h" using namespace llvm; @@ -120,12 +124,6 @@ public: ImmTyD16, ImmTyClampSI, ImmTyOModSI, - ImmTyDPP8, - ImmTyDppCtrl, - ImmTyDppRowMask, - ImmTyDppBankMask, - ImmTyDppBoundCtrl, - ImmTyDppFi, ImmTySdwaDstSel, ImmTySdwaSrc0Sel, ImmTySdwaSrc1Sel, @@ -151,6 +149,12 @@ public: ImmTyOpSelHi, ImmTyNegLo, ImmTyNegHi, + ImmTyDPP8, + ImmTyDppCtrl, + ImmTyDppRowMask, + ImmTyDppBankMask, + ImmTyDppBoundCtrl, + ImmTyDppFi, ImmTySwizzle, ImmTyGprIdxMode, ImmTyHigh, @@ -158,6 +162,8 @@ public: ImmTyCBSZ, ImmTyABID, ImmTyEndpgm, + ImmTyWaitVDST, + ImmTyWaitEXP, }; enum ImmKindTy { @@ -262,6 +268,14 @@ public: return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); } + bool isRegOrInlineImmWithInt16InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); + } + + bool isRegOrInlineImmWithInt32InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); + } + bool isRegOrImmWithInt64InputMods() const { return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); } @@ -278,6 +292,15 @@ public: return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); } + bool isRegOrInlineImmWithFP16InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); + } + + bool isRegOrInlineImmWithFP32InputMods() const { + return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); + } + + bool isVReg() const { return isRegClass(AMDGPU::VGPR_32RegClassID) || isRegClass(AMDGPU::VReg_64RegClassID) || @@ -815,6 +838,8 @@ public: } bool isSWaitCnt() const; + bool isDepCtr() const; + bool isSDelayAlu() const; bool isHwreg() const; bool isSendMsg() const; bool isSwizzle() const; @@ -830,6 +855,8 @@ public: bool isS16Imm() const; bool isU16Imm() const; bool isEndpgm() const; + bool isWaitVDST() const; + bool isWaitEXP() const; StringRef getExpressionAsToken() const { assert(isExpr()); @@ -1037,6 +1064,8 @@ public: case ImmTyCBSZ: OS << "CBSZ"; break; case ImmTyABID: OS << "ABID"; break; case ImmTyEndpgm: OS << "Endpgm"; break; + case ImmTyWaitVDST: OS << "WaitVDST"; break; + case ImmTyWaitEXP: OS << "WaitEXP"; break; } } @@ -1123,7 +1152,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { class KernelScopeInfo { int SgprIndexUnusedMin = -1; int VgprIndexUnusedMin = -1; + int AgprIndexUnusedMin = -1; MCContext *Ctx = nullptr; + MCSubtargetInfo const *MSTI = nullptr; void usesSgprAt(int i) { if (i >= SgprIndexUnusedMin) { @@ -1142,7 +1173,31 @@ class KernelScopeInfo { if (Ctx) { MCSymbol* const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); - Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); + int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, + VgprIndexUnusedMin); + Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); + } + } + } + + void usesAgprAt(int i) { + // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction + if (!hasMAIInsts(*MSTI)) + return; + + if (i >= AgprIndexUnusedMin) { + AgprIndexUnusedMin = ++i; + if (Ctx) { + MCSymbol* const Sym = + Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); + Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); + + // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) + MCSymbol* const vSym = + Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); + int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, + VgprIndexUnusedMin); + vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); } } } @@ -1152,16 +1207,29 @@ public: void initialize(MCContext &Context) { Ctx = &Context; + MSTI = Ctx->getSubtargetInfo(); + usesSgprAt(SgprIndexUnusedMin = -1); usesVgprAt(VgprIndexUnusedMin = -1); + if (hasMAIInsts(*MSTI)) { + usesAgprAt(AgprIndexUnusedMin = -1); + } } - void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { + void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, + unsigned RegWidth) { switch (RegKind) { - case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; - case IS_AGPR: // fall through - case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; - default: break; + case IS_SGPR: + usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); + break; + case IS_AGPR: + usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); + break; + case IS_VGPR: + usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); + break; + default: + break; } } }; @@ -1353,10 +1421,15 @@ public: return AMDGPU::isGFX9(getSTI()); } + // TODO: isGFX90A is also true for GFX940. We need to clean it. bool isGFX90A() const { return AMDGPU::isGFX90A(getSTI()); } + bool isGFX940() const { + return AMDGPU::isGFX940(getSTI()); + } + bool isGFX9Plus() const { return AMDGPU::isGFX9Plus(getSTI()); } @@ -1367,6 +1440,14 @@ public: bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } + bool isGFX11() const { + return AMDGPU::isGFX11(getSTI()); + } + + bool isGFX11Plus() const { + return AMDGPU::isGFX11Plus(getSTI()); + } + bool isGFX10_BEncoding() const { return AMDGPU::isGFX10_BEncoding(getSTI()); } @@ -1496,6 +1577,14 @@ public: bool parseCnt(int64_t &IntVal); OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); + + bool parseDepCtr(int64_t &IntVal, unsigned &Mask); + void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); + OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); + + bool parseDelay(int64_t &Delay); + OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); + OperandMatchResultTy parseHwreg(OperandVector &Operands); private: @@ -1522,6 +1611,7 @@ private: SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; + SMLoc getBLGPLoc(const OperandVector &Operands) const; SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, const OperandVector &Operands) const; @@ -1540,7 +1630,7 @@ private: bool validateMIMGAtomicDMask(const MCInst &Inst); bool validateMIMGGatherDMask(const MCInst &Inst); bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); - bool validateMIMGDataSize(const MCInst &Inst); + Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); bool validateMIMGAddrSize(const MCInst &Inst); bool validateMIMGD16(const MCInst &Inst); bool validateMIMGDim(const MCInst &Inst); @@ -1553,10 +1643,14 @@ private: bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); bool validateAGPRLdSt(const MCInst &Inst) const; bool validateVGPRAlign(const MCInst &Inst) const; + bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); bool validateGWS(const MCInst &Inst, const OperandVector &Operands); bool validateDivScale(const MCInst &Inst); bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc); + bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, + const SMLoc &IDLoc); + bool validateExeczVcczOperands(const OperandVector &Operands); Optional<StringRef> validateLdsDirect(const MCInst &Inst); unsigned getConstantBusLimit(unsigned Opcode) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); @@ -1586,7 +1680,7 @@ private: bool parseExpr(int64_t &Imm, StringRef Expected = ""); bool parseExpr(OperandVector &Operands); StringRef getTokenStr() const; - AsmToken peekToken(); + AsmToken peekToken(bool ShouldSkipSpace = true); AsmToken getToken() const; SMLoc getLoc() const; void lex(); @@ -1644,10 +1738,12 @@ public: void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); + void cvtVOPD(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); + void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); void cvtMIMG(MCInst &Inst, const OperandVector &Operands, bool IsAtomic = false); @@ -1668,7 +1764,24 @@ public: AMDGPUOperand::Ptr defaultBoundCtrl() const; AMDGPUOperand::Ptr defaultFI() const; void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); - void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } + void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { + cvtDPP(Inst, Operands, true); + } + void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, + bool IsDPP8 = false); + void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { + cvtVOPCNoDstDPP(Inst, Operands, true); + } + void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, + bool IsDPP8 = false); + void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { + cvtVOP3DPP(Inst, Operands, true); + } + void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, + bool IsDPP8 = false); + void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { + cvtVOPC64NoDstDPP(Inst, Operands, true); + } OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, AMDGPUOperand::ImmTy Type); @@ -1689,6 +1802,10 @@ public: OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; + + AMDGPUOperand::Ptr defaultWaitVDST() const; + AMDGPUOperand::Ptr defaultWaitEXP() const; + OperandMatchResultTy parseVOPD(OperandVector &Operands); }; struct OptionalOperand { @@ -1897,7 +2014,7 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const { // We allow fp literals with f16x2 operands assuming that the specified // literal goes into the lower half and the upper half is zero. We also - // require that the literal may be losslesly converted to f16. + // require that the literal may be losslessly converted to f16. MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : (type == MVT::v2i16)? MVT::i16 : (type == MVT::v2f32)? MVT::f32 : type; @@ -2211,52 +2328,86 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { if (Is == IS_VGPR) { switch (RegWidth) { default: return -1; - case 1: return AMDGPU::VGPR_32RegClassID; - case 2: return AMDGPU::VReg_64RegClassID; - case 3: return AMDGPU::VReg_96RegClassID; - case 4: return AMDGPU::VReg_128RegClassID; - case 5: return AMDGPU::VReg_160RegClassID; - case 6: return AMDGPU::VReg_192RegClassID; - case 7: return AMDGPU::VReg_224RegClassID; - case 8: return AMDGPU::VReg_256RegClassID; - case 16: return AMDGPU::VReg_512RegClassID; - case 32: return AMDGPU::VReg_1024RegClassID; + case 32: + return AMDGPU::VGPR_32RegClassID; + case 64: + return AMDGPU::VReg_64RegClassID; + case 96: + return AMDGPU::VReg_96RegClassID; + case 128: + return AMDGPU::VReg_128RegClassID; + case 160: + return AMDGPU::VReg_160RegClassID; + case 192: + return AMDGPU::VReg_192RegClassID; + case 224: + return AMDGPU::VReg_224RegClassID; + case 256: + return AMDGPU::VReg_256RegClassID; + case 512: + return AMDGPU::VReg_512RegClassID; + case 1024: + return AMDGPU::VReg_1024RegClassID; } } else if (Is == IS_TTMP) { switch (RegWidth) { default: return -1; - case 1: return AMDGPU::TTMP_32RegClassID; - case 2: return AMDGPU::TTMP_64RegClassID; - case 4: return AMDGPU::TTMP_128RegClassID; - case 8: return AMDGPU::TTMP_256RegClassID; - case 16: return AMDGPU::TTMP_512RegClassID; + case 32: + return AMDGPU::TTMP_32RegClassID; + case 64: + return AMDGPU::TTMP_64RegClassID; + case 128: + return AMDGPU::TTMP_128RegClassID; + case 256: + return AMDGPU::TTMP_256RegClassID; + case 512: + return AMDGPU::TTMP_512RegClassID; } } else if (Is == IS_SGPR) { switch (RegWidth) { default: return -1; - case 1: return AMDGPU::SGPR_32RegClassID; - case 2: return AMDGPU::SGPR_64RegClassID; - case 3: return AMDGPU::SGPR_96RegClassID; - case 4: return AMDGPU::SGPR_128RegClassID; - case 5: return AMDGPU::SGPR_160RegClassID; - case 6: return AMDGPU::SGPR_192RegClassID; - case 7: return AMDGPU::SGPR_224RegClassID; - case 8: return AMDGPU::SGPR_256RegClassID; - case 16: return AMDGPU::SGPR_512RegClassID; + case 32: + return AMDGPU::SGPR_32RegClassID; + case 64: + return AMDGPU::SGPR_64RegClassID; + case 96: + return AMDGPU::SGPR_96RegClassID; + case 128: + return AMDGPU::SGPR_128RegClassID; + case 160: + return AMDGPU::SGPR_160RegClassID; + case 192: + return AMDGPU::SGPR_192RegClassID; + case 224: + return AMDGPU::SGPR_224RegClassID; + case 256: + return AMDGPU::SGPR_256RegClassID; + case 512: + return AMDGPU::SGPR_512RegClassID; } } else if (Is == IS_AGPR) { switch (RegWidth) { default: return -1; - case 1: return AMDGPU::AGPR_32RegClassID; - case 2: return AMDGPU::AReg_64RegClassID; - case 3: return AMDGPU::AReg_96RegClassID; - case 4: return AMDGPU::AReg_128RegClassID; - case 5: return AMDGPU::AReg_160RegClassID; - case 6: return AMDGPU::AReg_192RegClassID; - case 7: return AMDGPU::AReg_224RegClassID; - case 8: return AMDGPU::AReg_256RegClassID; - case 16: return AMDGPU::AReg_512RegClassID; - case 32: return AMDGPU::AReg_1024RegClassID; + case 32: + return AMDGPU::AGPR_32RegClassID; + case 64: + return AMDGPU::AReg_64RegClassID; + case 96: + return AMDGPU::AReg_96RegClassID; + case 128: + return AMDGPU::AReg_128RegClassID; + case 160: + return AMDGPU::AReg_160RegClassID; + case 192: + return AMDGPU::AReg_192RegClassID; + case 224: + return AMDGPU::AReg_224RegClassID; + case 256: + return AMDGPU::AReg_256RegClassID; + case 512: + return AMDGPU::AReg_512RegClassID; + case 1024: + return AMDGPU::AReg_1024RegClassID; } } return -1; @@ -2343,32 +2494,32 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, case IS_SPECIAL: if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { Reg = AMDGPU::XNACK_MASK; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; - RegWidth = 2; + RegWidth = 64; return true; } if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; - RegWidth = 2; + RegWidth = 64; return true; } Error(Loc, "register does not fit in the list"); @@ -2377,11 +2528,11 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, case IS_SGPR: case IS_AGPR: case IS_TTMP: - if (Reg1 != Reg + RegWidth) { + if (Reg1 != Reg + RegWidth / 32) { Error(Loc, "registers in a list must have consecutive indices"); return false; } - RegWidth++; + RegWidth += 32; return true; default: llvm_unreachable("unexpected register kind"); @@ -2470,7 +2621,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, if (RegKind == IS_SGPR || RegKind == IS_TTMP) { // SGPR and TTMP registers must be aligned. // Max required alignment is 4 dwords. - AlignSize = std::min(RegWidth, 4u); + AlignSize = std::min(RegWidth / 32, 4u); } if (RegNum % AlignSize != 0) { @@ -2495,8 +2646,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, return RC.getRegister(RegIdx); } -bool -AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { +bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { int64_t RegLo, RegHi; if (!skipToken(AsmToken::LBrac, "missing register index")) return false; @@ -2534,7 +2684,7 @@ AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { } Num = static_cast<unsigned>(RegLo); - Width = (RegHi - RegLo) + 1; + RegWidth = 32 * ((RegHi - RegLo) + 1); return true; } @@ -2545,7 +2695,7 @@ unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, unsigned Reg = getSpecialRegForName(getTokenStr()); if (Reg) { RegNum = 0; - RegWidth = 1; + RegWidth = 32; RegKind = IS_SPECIAL; Tokens.push_back(getToken()); lex(); // skip register name @@ -2577,7 +2727,7 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, Error(Loc, "invalid register index"); return AMDGPU::NoRegister; } - RegWidth = 1; + RegWidth = 32; } else { // Range of registers: v[XX:YY]. ":YY" is optional. if (!ParseRegRange(RegNum, RegWidth)) @@ -2603,7 +2753,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, auto Loc = getLoc(); if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) return AMDGPU::NoRegister; - if (RegWidth != 1) { + if (RegWidth != 32) { Error(Loc, "expected a single 32-bit register"); return AMDGPU::NoRegister; } @@ -2618,7 +2768,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, Tokens)) { return AMDGPU::NoRegister; } - if (NextRegWidth != 1) { + if (NextRegWidth != 32) { Error(Loc, "expected a single 32-bit register"); return AMDGPU::NoRegister; } @@ -2721,7 +2871,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, return true; MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); - int64_t NewMax = DwordRegIndex + RegWidth - 1; + int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; int64_t OldCount; if (!Sym->isVariable()) @@ -2761,7 +2911,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { // TODO: add syntactic sugar for 1/(2*PI) - assert(!isRegister()); + if (isRegister()) + return MatchOperand_NoMatch; assert(!isModifier()); const auto& Tok = getToken(); @@ -2927,7 +3078,7 @@ AMDGPUAsmParser::isModifier() { // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 // Negative fp literals with preceding "-" are -// handled likewise for unifomtity +// handled likewise for uniformity // bool AMDGPUAsmParser::parseSP3NegModifier() { @@ -3110,7 +3261,8 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { static ArrayRef<unsigned> getAllVariants() { static const unsigned Variants[] = { AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, + AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP }; return makeArrayRef(Variants); @@ -3118,6 +3270,10 @@ static ArrayRef<unsigned> getAllVariants() { // What asm variants we should check ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { + if (isForcedDPP() && isForcedVOP3()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; + return makeArrayRef(Variants); + } if (getForcedEncodingSize() == 32) { static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; return makeArrayRef(Variants); @@ -3143,6 +3299,9 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { } StringRef AMDGPUAsmParser::getMatchedVariantName() const { + if (isForcedDPP() && isForcedVOP3()) + return "e64_dpp"; + if (getForcedEncodingSize() == 32) return "e32"; @@ -3231,10 +3390,13 @@ unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { // 64-bit shift instructions can use only one scalar value input case AMDGPU::V_LSHLREV_B64_e64: case AMDGPU::V_LSHLREV_B64_gfx10: + case AMDGPU::V_LSHLREV_B64_e64_gfx11: case AMDGPU::V_LSHRREV_B64_e64: case AMDGPU::V_LSHRREV_B64_gfx10: + case AMDGPU::V_LSHRREV_B64_e64_gfx11: case AMDGPU::V_ASHRREV_I64_e64: case AMDGPU::V_ASHRREV_I64_gfx10: + case AMDGPU::V_ASHRREV_I64_e64_gfx11: case AMDGPU::V_LSHL_B64_e64: case AMDGPU::V_LSHR_B64_e64: case AMDGPU::V_ASHR_I64_e64: @@ -3305,8 +3467,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, // flat_scratch_lo, flat_scratch_hi // are theoretically valid but they are disabled anyway. // Note that this code mimics SIInstrInfo::verifyInstruction - if (!SGPRsUsed.count(LastSGPR)) { - SGPRsUsed.insert(LastSGPR); + if (SGPRsUsed.insert(LastSGPR).second) { ++ConstantBusUseCount; } } else { // Expression or a literal @@ -3369,7 +3530,6 @@ AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, assert(DstIdx != -1); const MCOperand &Dst = Inst.getOperand(DstIdx); assert(Dst.isReg()); - const unsigned DstReg = mc2PseudoReg(Dst.getReg()); const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; @@ -3377,8 +3537,8 @@ AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, if (SrcIdx == -1) break; const MCOperand &Src = Inst.getOperand(SrcIdx); if (Src.isReg()) { - const unsigned SrcReg = mc2PseudoReg(Src.getReg()); - if (isRegIntersect(DstReg, SrcReg, TRI)) { + if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { + const unsigned SrcReg = mc2PseudoReg(Src.getReg()); Error(getRegLoc(SrcReg, Operands), "destination must be different than all sources"); return false; @@ -3403,13 +3563,13 @@ bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { return true; } -bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { +Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Opc); if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) - return true; + return None; int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); @@ -3418,7 +3578,7 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { assert(VDataIdx != -1); if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray - return true; + return None; unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; @@ -3426,15 +3586,22 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { if (DMask == 0) DMask = 1; + bool isPackedD16 = false; unsigned DataSize = (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); if (hasPackedD16()) { int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); - if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) + isPackedD16 = D16Idx >= 0; + if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) DataSize = (DataSize + 1) / 2; } - return (VDataSize / 4) == DataSize + TFESize; + if ((VDataSize / 4) == DataSize + TFESize) + return None; + + return StringRef(isPackedD16 + ? "image data size does not match dmask, d16 and tfe" + : "image data size does not match dmask and tfe"); } bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { @@ -3607,7 +3774,7 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, auto Reg = mc2PseudoReg(Src0.getReg()); const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - if (isSGPR(Reg, TRI)) { + if (!isGFX90A() && isSGPR(Reg, TRI)) { Error(getRegLoc(Reg, Operands), "source operand must be either a VGPR or an inline constant"); return false; @@ -3641,7 +3808,7 @@ bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) return true; - if (isRegIntersect(Src2Reg, DstReg, TRI)) { + if (TRI->regsOverlap(Src2Reg, DstReg)) { Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), "source 2 operand must not partially overlap with dst"); return false; @@ -3861,7 +4028,7 @@ Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { const auto &Src = Inst.getOperand(SrcIdx); if (Src.isReg() && Src.getReg() == LDS_DIRECT) { - if (isGFX90A()) + if (isGFX90A() || isGFX11Plus()) return StringRef("lds_direct is not supported on this GPU"); if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) @@ -4009,6 +4176,20 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { if (OpSel & ~3) return false; } + + if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx != -1) { + if (Inst.getOperand(OpSelIdx).getImm() != 0) + return false; + } + int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); + if (OpSelHiIdx != -1) { + if (Inst.getOperand(OpSelHiIdx).getImm() != -1) + return false; + } + } + return true; } @@ -4179,6 +4360,47 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { return true; } +SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (Op.isBLGP()) + return Op.getStartLoc(); + } + return SMLoc(); +} + +bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, + const OperandVector &Operands) { + unsigned Opc = Inst.getOpcode(); + int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); + if (BlgpIdx == -1) + return true; + SMLoc BLGPLoc = getBLGPLoc(Operands); + if (!BLGPLoc.isValid()) + return true; + bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); + auto FB = getFeatureBits(); + bool UsesNeg = false; + if (FB[AMDGPU::FeatureGFX940Insts]) { + switch (Opc) { + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: + UsesNeg = true; + } + } + + if (IsNeg == UsesNeg) + return true; + + Error(BLGPLoc, + UsesNeg ? "invalid modifier: blgp is not supported" + : "invalid modifier: neg is not supported"); + + return false; +} + // gfx90a has an undocumented limitation: // DS_GWS opcodes must use even aligned registers. bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, @@ -4218,13 +4440,19 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, unsigned CPol = Inst.getOperand(CPolPos).getImm(); uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; - if ((TSFlags & (SIInstrFlags::SMRD)) && - (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { - Error(IDLoc, "invalid cache policy for SMRD instruction"); - return false; + if (TSFlags & SIInstrFlags::SMRD) { + if (CPol && (isSI() || isCI())) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); + Error(S, "cache policy is not supported for SMRD instructions"); + return false; + } + if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { + Error(IDLoc, "invalid cache policy for SMEM instruction"); + return false; + } } - if (isGFX90A() && (CPol & CPol::SCC)) { + if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); @@ -4237,15 +4465,18 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, if (TSFlags & SIInstrFlags::IsAtomicRet) { if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { - Error(IDLoc, "instruction must use glc"); + Error(IDLoc, isGFX940() ? "instruction must use sc0" + : "instruction must use glc"); return false; } } else { if (CPol & CPol::GLC) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); - S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); - Error(S, "instruction must not use glc"); + S = SMLoc::getFromPointer( + &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); + Error(S, isGFX940() ? "instruction must not use sc0" + : "instruction must not use glc"); return false; } } @@ -4253,6 +4484,47 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, return true; } +bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, + const OperandVector &Operands, + const SMLoc &IDLoc) { + if (isGFX940()) + return true; + + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != + (SIInstrFlags::VALU | SIInstrFlags::FLAT)) + return true; + // This is FLAT LDS DMA. + + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); + StringRef CStr(S.getPointer()); + if (!CStr.startswith("lds")) { + // This is incorrectly selected LDS DMA version of a FLAT load opcode. + // And LDS version should have 'lds' modifier, but it follows optional + // operands so its absense is ignored by the matcher. + Error(IDLoc, "invalid operands for instruction"); + return false; + } + + return true; +} + +bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { + if (!isGFX11Plus()) + return true; + for (auto &Operand : Operands) { + if (!Operand->isReg()) + continue; + unsigned Reg = Operand->getReg(); + if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { + Error(getRegLoc(Reg, Operands), + "execz and vccz are not supported on this GPU"); + return false; + } + } + return true; +} + bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands) { @@ -4302,9 +4574,8 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "invalid dim; must be MSAA type"); return false; } - if (!validateMIMGDataSize(Inst)) { - Error(IDLoc, - "image data size does not match dmask and tfe"); + if (auto ErrMsg = validateMIMGDataSize(Inst)) { + Error(IDLoc, *ErrMsg); return false; } if (!validateMIMGAddrSize(Inst)) { @@ -4357,6 +4628,10 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, return false; } + if (!validateBLGP(Inst, Operands)) { + return false; + } + if (!validateDivScale(Inst)) { Error(IDLoc, "ABS not allowed in VOP3B instructions"); return false; @@ -4364,6 +4639,13 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, if (!validateCoherencyBits(Inst, Operands, IDLoc)) { return false; } + if (!validateExeczVcczOperands(Operands)) { + return false; + } + + if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { + return false; + } return true; } @@ -4606,6 +4888,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { SMRange VGPRRange; uint64_t NextFreeVGPR = 0; uint64_t AccumOffset = 0; + uint64_t SharedVGPRCount = 0; SMRange SGPRRange; uint64_t NextFreeSGPR = 0; @@ -4630,9 +4913,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (ID == ".end_amdhsa_kernel") break; - if (Seen.find(ID) != Seen.end()) + if (!Seen.insert(ID).second) return TokError(".amdhsa_ directives cannot be repeated"); - Seen.insert(ID); SMLoc ValStart = getLoc(); int64_t IVal; @@ -4833,6 +5115,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { return Error(IDRange.Start, "directive requires gfx10+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, ValRange); + } else if (ID == ".amdhsa_shared_vgpr_count") { + if (IVersion.Major < 10) + return Error(IDRange.Start, "directive requires gfx10+", IDRange); + SharedVGPRCount = Val; + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, + COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, + ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, @@ -4922,6 +5211,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { (AccumOffset / 4 - 1)); } + if (IVersion.Major == 10) { + // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS + if (SharedVGPRCount && EnableWavefrontSize32) { + return TokError("shared_vgpr_count directive not valid on " + "wavefront size 32"); + } + if (SharedVGPRCount * 2 + VGPRBlocks > 63) { + return TokError("shared_vgpr_count*2 + " + "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " + "exceed 63\n"); + } + } + getTargetStreamer().EmitAmdhsaKernelDescriptor( getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, ReserveFlatScr); @@ -5253,8 +5555,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { return Error(AlignLoc, "alignment is too large"); } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.amdgpu_lds' directive")) + if (parseEOL()) return true; Symbol->redefineIfPossible(); @@ -5313,26 +5614,21 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) { - for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); - R.isValid(); ++R) { - if (*R == RegNo) - return isGFX9Plus(); - } + if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) + return isGFX9Plus(); - // GFX10 has 2 more SGPRs 104 and 105. - for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); - R.isValid(); ++R) { - if (*R == RegNo) - return hasSGPR104_SGPR105(); - } + // GFX10+ has 2 more SGPRs 104 and 105. + if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) + return hasSGPR104_SGPR105(); switch (RegNo) { case AMDGPU::SRC_SHARED_BASE: case AMDGPU::SRC_SHARED_LIMIT: case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_PRIVATE_LIMIT: - case AMDGPU::SRC_POPS_EXITING_WAVE_ID: return isGFX9Plus(); + case AMDGPU::SRC_POPS_EXITING_WAVE_ID: + return isGFX9Plus() && !isGFX11Plus(); case AMDGPU::TBA: case AMDGPU::TBA_LO: case AMDGPU::TBA_HI: @@ -5355,7 +5651,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, if (isSI() || isGFX10Plus()) { // No flat_scr on SI. - // On GFX10 flat scratch is not a valid register operand and can only be + // On GFX10Plus flat scratch is not a valid register operand and can only be // accessed with s_setreg/s_getreg. switch (RegNo) { case AMDGPU::FLAT_SCR: @@ -5369,11 +5665,8 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that // SI/CI have. - for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); - R.isValid(); ++R) { - if (*R == RegNo) - return hasSGPR102_SGPR103(); - } + if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) + return hasSGPR102_SGPR103(); return true; } @@ -5381,8 +5674,13 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, OperandMatchResultTy AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, OperandMode Mode) { + OperandMatchResultTy ResTy = parseVOPD(Operands); + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || + isToken(AsmToken::EndOfStatement)) + return ResTy; + // Try to parse with a custom parser - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + ResTy = MatchOperandParserImpl(Operands, Mnemonic); // If we successfully parsed the operand or if there as an error parsing, // we are done. @@ -5435,7 +5733,11 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { setForcedDPP(false); setForcedSDWA(false); - if (Name.endswith("_e64")) { + if (Name.endswith("_e64_dpp")) { + setForcedDPP(true); + setForcedEncodingSize(64); + return Name.substr(0, Name.size() - 8); + } else if (Name.endswith("_e64")) { setForcedEncodingSize(64); return Name.substr(0, Name.size() - 4); } else if (Name.endswith("_e32")) { @@ -5451,11 +5753,20 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { return Name; } +static void applyMnemonicAliases(StringRef &Mnemonic, + const FeatureBitset &Features, + unsigned VariantID); + bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // Add the instruction mnemonic Name = parseMnemonicSuffix(Name); + + // If the target architecture uses MnemonicAlias, call it here to parse + // operands correctly. + applyMnemonicAliases(Name, getAvailableFeatures(), 0); + Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); bool IsMIMG = Name.startswith("image_"); @@ -5603,7 +5914,24 @@ AMDGPUAsmParser::parseCPol(OperandVector &Operands) { unsigned CPolOff = 0; SMLoc S = getLoc(); - if (trySkipId("glc")) + StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); + if (isGFX940() && !Mnemo.startswith("s_")) { + if (trySkipId("sc0")) + CPolOn = AMDGPU::CPol::SC0; + else if (trySkipId("nosc0")) + CPolOff = AMDGPU::CPol::SC0; + else if (trySkipId("nt")) + CPolOn = AMDGPU::CPol::NT; + else if (trySkipId("nont")) + CPolOff = AMDGPU::CPol::NT; + else if (trySkipId("sc1")) + CPolOn = AMDGPU::CPol::SC1; + else if (trySkipId("nosc1")) + CPolOff = AMDGPU::CPol::SC1; + else + return MatchOperand_NoMatch; + } + else if (trySkipId("glc")) CPolOn = AMDGPU::CPol::GLC; else if (trySkipId("noglc")) CPolOff = AMDGPU::CPol::GLC; @@ -5809,7 +6137,7 @@ AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; if (isGFX10Plus()) { - auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); + auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); if (Ufmt == UFMT_UNDEF) { Error(FormatLoc, "unsupported format"); return MatchOperand_ParseFail; @@ -5828,7 +6156,7 @@ AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, int64_t &Format) { using namespace llvm::AMDGPU::MTBUFFormat; - auto Id = getUnifiedFormat(FormatStr); + auto Id = getUnifiedFormat(FormatStr, getSTI()); if (Id == UFMT_UNDEF) return MatchOperand_NoMatch; @@ -5969,6 +6297,7 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, bool IsGdsHardcoded) { OptionalImmIndexMap OptionalIdx; + AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; for (unsigned i = 1, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -5986,13 +6315,10 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, // Handle optional arguments OptionalIdx[Op.getImmTy()] = i; - } - AMDGPUOperand::ImmTy OffsetType = - (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || - Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || - Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : - AMDGPUOperand::ImmTyOffset; + if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) + OffsetType = AMDGPUOperand::ImmTySwizzle; + } addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); @@ -6034,7 +6360,7 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { continue; } - if (Op.isToken() && Op.getToken() == "done") + if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) continue; // Handle optional arguments @@ -6157,11 +6483,179 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { return MatchOperand_Success; } +bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { + SMLoc FieldLoc = getLoc(); + StringRef FieldName = getTokenStr(); + if (!skipToken(AsmToken::Identifier, "expected a field name") || + !skipToken(AsmToken::LParen, "expected a left parenthesis")) + return false; + + SMLoc ValueLoc = getLoc(); + StringRef ValueName = getTokenStr(); + if (!skipToken(AsmToken::Identifier, "expected a value name") || + !skipToken(AsmToken::RParen, "expected a right parenthesis")) + return false; + + unsigned Shift; + if (FieldName == "instid0") { + Shift = 0; + } else if (FieldName == "instskip") { + Shift = 4; + } else if (FieldName == "instid1") { + Shift = 7; + } else { + Error(FieldLoc, "invalid field name " + FieldName); + return false; + } + + int Value; + if (Shift == 4) { + // Parse values for instskip. + Value = StringSwitch<int>(ValueName) + .Case("SAME", 0) + .Case("NEXT", 1) + .Case("SKIP_1", 2) + .Case("SKIP_2", 3) + .Case("SKIP_3", 4) + .Case("SKIP_4", 5) + .Default(-1); + } else { + // Parse values for instid0 and instid1. + Value = StringSwitch<int>(ValueName) + .Case("NO_DEP", 0) + .Case("VALU_DEP_1", 1) + .Case("VALU_DEP_2", 2) + .Case("VALU_DEP_3", 3) + .Case("VALU_DEP_4", 4) + .Case("TRANS32_DEP_1", 5) + .Case("TRANS32_DEP_2", 6) + .Case("TRANS32_DEP_3", 7) + .Case("FMA_ACCUM_CYCLE_1", 8) + .Case("SALU_CYCLE_1", 9) + .Case("SALU_CYCLE_2", 10) + .Case("SALU_CYCLE_3", 11) + .Default(-1); + } + if (Value < 0) { + Error(ValueLoc, "invalid value name " + ValueName); + return false; + } + + Delay |= Value << Shift; + return true; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { + int64_t Delay = 0; + SMLoc S = getLoc(); + + if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { + do { + if (!parseDelay(Delay)) + return MatchOperand_ParseFail; + } while (trySkipToken(AsmToken::Pipe)); + } else { + if (!parseExpr(Delay)) + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); + return MatchOperand_Success; +} + bool AMDGPUOperand::isSWaitCnt() const { return isImm(); } +bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } + +//===----------------------------------------------------------------------===// +// DepCtr +//===----------------------------------------------------------------------===// + +void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, + StringRef DepCtrName) { + switch (ErrorId) { + case OPR_ID_UNKNOWN: + Error(Loc, Twine("invalid counter name ", DepCtrName)); + return; + case OPR_ID_UNSUPPORTED: + Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); + return; + case OPR_ID_DUPLICATE: + Error(Loc, Twine("duplicate counter name ", DepCtrName)); + return; + case OPR_VAL_INVALID: + Error(Loc, Twine("invalid value for ", DepCtrName)); + return; + default: + assert(false); + } +} + +bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { + + using namespace llvm::AMDGPU::DepCtr; + + SMLoc DepCtrLoc = getLoc(); + StringRef DepCtrName = getTokenStr(); + + if (!skipToken(AsmToken::Identifier, "expected a counter name") || + !skipToken(AsmToken::LParen, "expected a left parenthesis")) + return false; + + int64_t ExprVal; + if (!parseExpr(ExprVal)) + return false; + + unsigned PrevOprMask = UsedOprMask; + int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); + + if (CntVal < 0) { + depCtrError(DepCtrLoc, CntVal, DepCtrName); + return false; + } + + if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) + return false; + + if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { + if (isToken(AsmToken::EndOfStatement)) { + Error(getLoc(), "expected a counter name"); + return false; + } + } + + unsigned CntValMask = PrevOprMask ^ UsedOprMask; + DepCtr = (DepCtr & ~CntValMask) | CntVal; + return true; +} + +OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { + using namespace llvm::AMDGPU::DepCtr; + + int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); + SMLoc Loc = getLoc(); + + if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { + unsigned UsedOprMask = 0; + while (!isToken(AsmToken::EndOfStatement)) { + if (!parseDepCtr(DepCtr, UsedOprMask)) + return MatchOperand_ParseFail; + } + } else { + if (!parseExpr(DepCtr)) + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); + return MatchOperand_Success; +} + +bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } + //===----------------------------------------------------------------------===// // hwreg //===----------------------------------------------------------------------===// @@ -6175,7 +6669,7 @@ AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, // The register may be specified by name or using a numeric code HwReg.Loc = getLoc(); if (isToken(AsmToken::Identifier) && - (HwReg.Id = getHwregId(getTokenStr())) >= 0) { + (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { HwReg.IsSymbolic = true; lex(); // skip register name } else if (!parseExpr(HwReg.Id, "a register name")) { @@ -6208,15 +6702,18 @@ AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, using namespace llvm::AMDGPU::Hwreg; - if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { - Error(HwReg.Loc, - "specified hardware register is not supported on this GPU"); - return false; - } - if (!isValidHwreg(HwReg.Id)) { - Error(HwReg.Loc, - "invalid code of hardware register: only 6-bit values are legal"); - return false; + if (HwReg.IsSymbolic) { + if (HwReg.Id == OPR_ID_UNSUPPORTED) { + Error(HwReg.Loc, + "specified hardware register is not supported on this GPU"); + return false; + } + } else { + if (!isValidHwreg(HwReg.Id)) { + Error(HwReg.Loc, + "invalid code of hardware register: only 6-bit values are legal"); + return false; + } } if (!isValidHwregOffset(Offset.Id)) { Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); @@ -6238,7 +6735,7 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { SMLoc Loc = getLoc(); if (trySkipId("hwreg", AsmToken::LParen)) { - OperandInfoTy HwReg(ID_UNKNOWN_); + OperandInfoTy HwReg(OPR_ID_UNKNOWN); OperandInfoTy Offset(OFFSET_DEFAULT_); OperandInfoTy Width(WIDTH_DEFAULT_); if (parseHwregBody(HwReg, Offset, Width) && @@ -6275,7 +6772,8 @@ AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, using namespace llvm::AMDGPU::SendMsg; Msg.Loc = getLoc(); - if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { + if (isToken(AsmToken::Identifier) && + (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { Msg.IsSymbolic = true; lex(); // skip message name } else if (!parseExpr(Msg.Id, "a message name")) { @@ -6310,15 +6808,22 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, using namespace llvm::AMDGPU::SendMsg; // Validation strictness depends on whether message is specified - // in a symbolc or in a numeric form. In the latter case + // in a symbolic or in a numeric form. In the latter case // only encoding possibility is checked. bool Strict = Msg.IsSymbolic; - if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { - Error(Msg.Loc, "invalid message id"); - return false; + if (Strict) { + if (Msg.Id == OPR_ID_UNSUPPORTED) { + Error(Msg.Loc, "specified message id is not supported on this GPU"); + return false; + } + } else { + if (!isValidMsgId(Msg.Id, getSTI())) { + Error(Msg.Loc, "invalid message id"); + return false; + } } - if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { + if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { if (Op.IsDefined) { Error(Op.Loc, "message does not support operations"); } else { @@ -6330,7 +6835,8 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, Error(Op.Loc, "invalid operation id"); return false; } - if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { + if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && + Stream.IsDefined) { Error(Stream.Loc, "message operation does not support streams"); return false; } @@ -6349,7 +6855,7 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { SMLoc Loc = getLoc(); if (trySkipId("sendmsg", AsmToken::LParen)) { - OperandInfoTy Msg(ID_UNKNOWN_); + OperandInfoTy Msg(OPR_ID_UNKNOWN); OperandInfoTy Op(OP_NONE_); OperandInfoTy Stream(STREAM_ID_NONE_); if (parseSendMsgBody(Msg, Op, Stream) && @@ -6610,9 +7116,10 @@ AMDGPUAsmParser::getToken() const { return Parser.getTok(); } -AsmToken -AMDGPUAsmParser::peekToken() { - return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); +AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { + return isToken(AsmToken::EndOfStatement) + ? getToken() + : getLexer().peekTok(ShouldSkipSpace); } void @@ -7078,8 +7585,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsLds) { - bool IsLdsOpcode = IsLds; - bool HasLdsModifier = false; OptionalImmIndexMap OptionalIdx; unsigned FirstOperandIdx = 1; bool IsAtomicReturn = false; @@ -7123,8 +7628,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, continue; } - HasLdsModifier |= Op.isLDS(); - // Handle tokens like 'offen' which are sometimes hard-coded into the // asm string. There are no MCInst operands for these. if (Op.isToken()) { @@ -7136,25 +7639,10 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, OptionalIdx[Op.getImmTy()] = i; } - // This is a workaround for an llvm quirk which may result in an - // incorrect instruction selection. Lds and non-lds versions of - // MUBUF instructions are identical except that lds versions - // have mandatory 'lds' modifier. However this modifier follows - // optional modifiers and llvm asm matcher regards this 'lds' - // modifier as an optional one. As a result, an lds version - // of opcode may be selected even if it has no 'lds' modifier. - if (IsLdsOpcode && !HasLdsModifier) { - int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); - if (NoLdsOpcode != -1) { // Got lds version - correct it. - Inst.setOpcode(NoLdsOpcode); - IsLdsOpcode = false; - } - } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); - if (!IsLdsOpcode) { // tfe is not legal with lds opcodes + if (!IsLds) { // tfe is not legal with lds opcodes addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); @@ -7327,7 +7815,8 @@ bool AMDGPUOperand::isSMRDOffset8() const { } bool AMDGPUOperand::isSMEMOffset() const { - return isImm(); // Offset range is checked later by validator. + return isImmTy(ImmTyNone) || + isImmTy(ImmTyOffset); // Offset range is checked later by validator. } bool AMDGPUOperand::isSMRDLiteralOffset() const { @@ -7415,10 +7904,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, - {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, - {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, - {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, - {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, @@ -7429,9 +7914,17 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, + {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, + {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, + {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, + {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, + {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, + {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, - {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} + {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, + {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, + {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} }; void AMDGPUAsmParser::onBeginOfFile() { @@ -7497,8 +7990,17 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) res = parseDim(Operands); } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { res = parseCPol(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { + res = parseDPP8(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { + res = parseDPPCtrl(Operands); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); + if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { + res = parseOperandArrayWithPrefix("neg", Operands, + AMDGPUOperand::ImmTyBLGP, + nullptr); + } } if (res != MatchOperand_NoMatch) { return res; @@ -7596,6 +8098,66 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) } } +void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) +{ + OptionalImmIndexMap OptionalIdx; + unsigned Opc = Inst.getOpcode(); + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("unhandled operand type"); + } + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx != -1) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); + + if (OpSelIdx == -1) + return; + + const int Ops[] = { AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; + const int ModOps[] = { AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers }; + + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); + if (OpIdx == -1) + break; + + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); + + if ((OpSel & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_0; + if (ModOps[J] == AMDGPU::OpName::src0_modifiers && + (OpSel & (1 << 3)) != 0) + ModVal |= SISrcMods::DST_OP_SEL; + + Inst.getOperand(ModIdx).setImm(ModVal); + } +} + void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx) { unsigned Opc = Inst.getOpcode(); @@ -7652,9 +8214,12 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || + Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F32_e64_vi || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || - Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { + Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || + Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || + Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { auto it = Inst.begin(); std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 @@ -7731,6 +8296,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, if (OpIdx == -1) break; + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + + if (ModIdx == -1) + continue; + uint32_t ModVal = 0; if ((OpSel & (1 << J)) != 0) @@ -7745,8 +8315,6 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, if ((NegHi & (1 << J)) != 0) ModVal |= SISrcMods::NEG_HI; - int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); - Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); } } @@ -7758,6 +8326,118 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { } //===----------------------------------------------------------------------===// +// VOPD +//===----------------------------------------------------------------------===// + +OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { + if (!hasVOPD(getSTI())) + return MatchOperand_NoMatch; + + if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { + SMLoc S = getLoc(); + lex(); + lex(); + Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); + const MCExpr *Expr; + if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) { + Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); + return MatchOperand_Success; + } + Error(S, "invalid VOPD :: usage"); + return MatchOperand_ParseFail; + } + return MatchOperand_NoMatch; +} + +// Create VOPD MCInst operands using parsed assembler operands. +// Parsed VOPD operands are ordered as follows: +// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' +// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] +// If both OpX and OpY have an imm, the first imm has a different name: +// OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' +// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] +// MCInst operands have the following order: +// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] +void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { + auto addOp = [&](uint16_t i) { // NOLINT:function pointer + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + return; + } + if (Op.isImm()) { + Op.addImmOperands(Inst, 1); + return; + } + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + return; + } + llvm_unreachable("Unhandled operand type in cvtVOPD"); + }; + + // Indices into MCInst.Operands + const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... + const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... + const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... + + unsigned Opc = Inst.getOpcode(); + bool HasVsrc1X = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; + bool HasImmX = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || + (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == + FmamkOpXImmMCIndex || + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == + FmaakOpXImmMCIndex)); + + bool HasVsrc1Y = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; + bool HasImmY = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= + MinOpYImmMCIndex + HasVsrc1X; + + // Indices of parsed operands relative to dst + const auto DstIdx = 0; + const auto Src0Idx = 1; + const auto Vsrc1OrImmIdx = 2; + + const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; + const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) + + // Offsets into parsed operands + const auto OpXFirstOperandOffset = 1; + const auto OpYFirstOperandOffset = + OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; + + // Order of addOp calls determines MC operand order + addOp(OpXFirstOperandOffset + DstIdx); // vdstX + addOp(OpYFirstOperandOffset + DstIdx); // vdstY + + addOp(OpXFirstOperandOffset + Src0Idx); // src0X + if (HasImmX) { + // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); + } else { + if (HasVsrc1X) // all except v_mov + addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X + } + + addOp(OpYFirstOperandOffset + Src0Idx); // src0Y + if (HasImmY) { + // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); + } else { + if (HasVsrc1Y) // all except v_mov + addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y + } +} + +//===----------------------------------------------------------------------===// // dpp //===----------------------------------------------------------------------===// @@ -8067,6 +8747,88 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); } +// Add dummy $old operand +void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, + const OperandVector &Operands, + bool IsDPP8) { + Inst.addOperand(MCOperand::createReg(0)); + cvtVOP3DPP(Inst, Operands, IsDPP8); +} + +void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { + OptionalImmIndexMap OptionalIdx; + unsigned Opc = Inst.getOpcode(); + bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + int Fi = 0; + for (unsigned E = Operands.size(); I != E; ++I) { + auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), + MCOI::TIED_TO); + if (TiedTo != -1) { + assert((unsigned)TiedTo < Inst.getNumOperands()); + // handle tied old or src2 for MAC instructions + Inst.addOperand(Inst.getOperand(TiedTo)); + } + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + // Add the register arguments + if (IsDPP8 && Op.isFI()) { + Fi = Op.getImm(); + } else if (HasModifiers && + isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + } else if (Op.isImm() && + Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { + assert(!HasModifiers && "Case should be unreachable with modifiers"); + assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); + Op.addImmOperands(Inst, 1); + } else if (Op.isImm()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("unhandled operand type"); + } + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } + if (Desc.TSFlags & SIInstrFlags::VOP3P) + cvtVOP3P(Inst, Operands, OptionalIdx); + else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); + } + + if (IsDPP8) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); + using namespace llvm::AMDGPU::DPP; + Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); + } else { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); + } + } +} + +// Add dummy $old operand +void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, + const OperandVector &Operands, + bool IsDPP8) { + Inst.addOperand(MCOperand::createReg(0)); + cvtDPP(Inst, Operands, IsDPP8); +} + void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; @@ -8352,7 +9114,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { #define GET_MNEMONIC_CHECKER #include "AMDGPUGenAsmMatcher.inc" -// This fuction should be defined after auto-generated include so that we have +// This function should be defined after auto-generated include so that we have // MatchClassKind enum defined unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) { @@ -8431,3 +9193,27 @@ OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { } bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } + +//===----------------------------------------------------------------------===// +// LDSDIR +//===----------------------------------------------------------------------===// + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); +} + +bool AMDGPUOperand::isWaitVDST() const { + return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); +} + +//===----------------------------------------------------------------------===// +// VINTERP +//===----------------------------------------------------------------------===// + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); +} + +bool AMDGPUOperand::isWaitEXP() const { + return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); +} |
