diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-12-20 19:53:05 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-12-20 19:53:05 +0000 |
commit | 0b57cec536236d46e3dba9bd041533462f33dbb7 (patch) | |
tree | 56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | |
parent | 718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff) |
Notes
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 6880 |
1 files changed, 6880 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp new file mode 100644 index 000000000000..6d678966c98e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -0,0 +1,6880 @@ +//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDKernelCodeT.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" +#include "SIDefines.h" +#include "SIInstrInfo.h" +#include "TargetInfo/AMDGPUTargetInfo.h" +#include "Utils/AMDGPUAsmUtils.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "Utils/AMDKernelCodeTUtils.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AMDGPUMetadata.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/TargetParser.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <map> +#include <memory> +#include <string> + +using namespace llvm; +using namespace llvm::AMDGPU; +using namespace llvm::amdhsa; + +namespace { + +class AMDGPUAsmParser; + +enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; + +//===----------------------------------------------------------------------===// +// Operand +//===----------------------------------------------------------------------===// + +class AMDGPUOperand : public MCParsedAsmOperand { + enum KindTy { + Token, + Immediate, + Register, + Expression + } Kind; + + SMLoc StartLoc, EndLoc; + const AMDGPUAsmParser *AsmParser; + +public: + AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) + : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} + + using Ptr = std::unique_ptr<AMDGPUOperand>; + + struct Modifiers { + bool Abs = false; + bool Neg = false; + bool Sext = false; + + bool hasFPModifiers() const { return Abs || Neg; } + bool hasIntModifiers() const { return Sext; } + bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } + + int64_t getFPModifiersOperand() const { + int64_t Operand = 0; + Operand |= Abs ? SISrcMods::ABS : 0u; + Operand |= Neg ? SISrcMods::NEG : 0u; + return Operand; + } + + int64_t getIntModifiersOperand() const { + int64_t Operand = 0; + Operand |= Sext ? SISrcMods::SEXT : 0u; + return Operand; + } + + int64_t getModifiersOperand() const { + assert(!(hasFPModifiers() && hasIntModifiers()) + && "fp and int modifiers should not be used simultaneously"); + if (hasFPModifiers()) { + return getFPModifiersOperand(); + } else if (hasIntModifiers()) { + return getIntModifiersOperand(); + } else { + return 0; + } + } + + friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); + }; + + enum ImmTy { + ImmTyNone, + ImmTyGDS, + ImmTyLDS, + ImmTyOffen, + ImmTyIdxen, + ImmTyAddr64, + ImmTyOffset, + ImmTyInstOffset, + ImmTyOffset0, + ImmTyOffset1, + ImmTyDLC, + ImmTyGLC, + ImmTySLC, + ImmTyTFE, + ImmTyD16, + ImmTyClampSI, + ImmTyOModSI, + ImmTyDPP8, + ImmTyDppCtrl, + ImmTyDppRowMask, + ImmTyDppBankMask, + ImmTyDppBoundCtrl, + ImmTyDppFi, + ImmTySdwaDstSel, + ImmTySdwaSrc0Sel, + ImmTySdwaSrc1Sel, + ImmTySdwaDstUnused, + ImmTyDMask, + ImmTyDim, + ImmTyUNorm, + ImmTyDA, + ImmTyR128A16, + ImmTyLWE, + ImmTyExpTgt, + ImmTyExpCompr, + ImmTyExpVM, + ImmTyFORMAT, + ImmTyHwreg, + ImmTyOff, + ImmTySendMsg, + ImmTyInterpSlot, + ImmTyInterpAttr, + ImmTyAttrChan, + ImmTyOpSel, + ImmTyOpSelHi, + ImmTyNegLo, + ImmTyNegHi, + ImmTySwizzle, + ImmTyGprIdxMode, + ImmTyHigh, + ImmTyBLGP, + ImmTyCBSZ, + ImmTyABID, + ImmTyEndpgm, + }; + +private: + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct ImmOp { + int64_t Val; + ImmTy Type; + bool IsFPImm; + Modifiers Mods; + }; + + struct RegOp { + unsigned RegNo; + Modifiers Mods; + }; + + union { + TokOp Tok; + ImmOp Imm; + RegOp Reg; + const MCExpr *Expr; + }; + +public: + bool isToken() const override { + if (Kind == Token) + return true; + + if (Kind != Expression || !Expr) + return false; + + // When parsing operands, we can't always tell if something was meant to be + // a token, like 'gds', or an expression that references a global variable. + // In this case, we assume the string is an expression, and if we need to + // interpret is a token, then we treat the symbol name as the token. + return isa<MCSymbolRefExpr>(Expr); + } + + bool isImm() const override { + return Kind == Immediate; + } + + bool isInlinableImm(MVT type) const; + bool isLiteralImm(MVT type) const; + + bool isRegKind() const { + return Kind == Register; + } + + bool isReg() const override { + return isRegKind() && !hasModifiers(); + } + + bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { + return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); + } + + bool isRegOrImmWithInt16InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); + } + + bool isRegOrImmWithInt32InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); + } + + bool isRegOrImmWithInt64InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); + } + + bool isRegOrImmWithFP16InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); + } + + bool isRegOrImmWithFP32InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); + } + + bool isRegOrImmWithFP64InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); + } + + bool isVReg() const { + return isRegClass(AMDGPU::VGPR_32RegClassID) || + isRegClass(AMDGPU::VReg_64RegClassID) || + isRegClass(AMDGPU::VReg_96RegClassID) || + isRegClass(AMDGPU::VReg_128RegClassID) || + isRegClass(AMDGPU::VReg_256RegClassID) || + isRegClass(AMDGPU::VReg_512RegClassID); + } + + bool isVReg32() const { + return isRegClass(AMDGPU::VGPR_32RegClassID); + } + + bool isVReg32OrOff() const { + return isOff() || isVReg32(); + } + + bool isSDWAOperand(MVT type) const; + bool isSDWAFP16Operand() const; + bool isSDWAFP32Operand() const; + bool isSDWAInt16Operand() const; + bool isSDWAInt32Operand() const; + + bool isImmTy(ImmTy ImmT) const { + return isImm() && Imm.Type == ImmT; + } + + bool isImmModifier() const { + return isImm() && Imm.Type != ImmTyNone; + } + + bool isClampSI() const { return isImmTy(ImmTyClampSI); } + bool isOModSI() const { return isImmTy(ImmTyOModSI); } + bool isDMask() const { return isImmTy(ImmTyDMask); } + bool isDim() const { return isImmTy(ImmTyDim); } + bool isUNorm() const { return isImmTy(ImmTyUNorm); } + bool isDA() const { return isImmTy(ImmTyDA); } + bool isR128A16() const { return isImmTy(ImmTyR128A16); } + bool isLWE() const { return isImmTy(ImmTyLWE); } + bool isOff() const { return isImmTy(ImmTyOff); } + bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } + bool isExpVM() const { return isImmTy(ImmTyExpVM); } + bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } + bool isOffen() const { return isImmTy(ImmTyOffen); } + bool isIdxen() const { return isImmTy(ImmTyIdxen); } + bool isAddr64() const { return isImmTy(ImmTyAddr64); } + bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } + bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } + bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } + + bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } + bool isGDS() const { return isImmTy(ImmTyGDS); } + bool isLDS() const { return isImmTy(ImmTyLDS); } + bool isDLC() const { return isImmTy(ImmTyDLC); } + bool isGLC() const { return isImmTy(ImmTyGLC); } + bool isSLC() const { return isImmTy(ImmTySLC); } + bool isTFE() const { return isImmTy(ImmTyTFE); } + bool isD16() const { return isImmTy(ImmTyD16); } + bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } + bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } + bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } + bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } + bool isFI() const { return isImmTy(ImmTyDppFi); } + bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } + bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } + bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } + bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } + bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } + bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } + bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } + bool isOpSel() const { return isImmTy(ImmTyOpSel); } + bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } + bool isNegLo() const { return isImmTy(ImmTyNegLo); } + bool isNegHi() const { return isImmTy(ImmTyNegHi); } + bool isHigh() const { return isImmTy(ImmTyHigh); } + + bool isMod() const { + return isClampSI() || isOModSI(); + } + + bool isRegOrImm() const { + return isReg() || isImm(); + } + + bool isRegClass(unsigned RCID) const; + + bool isInlineValue() const; + + bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { + return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); + } + + bool isSCSrcB16() const { + return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); + } + + bool isSCSrcV2B16() const { + return isSCSrcB16(); + } + + bool isSCSrcB32() const { + return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); + } + + bool isSCSrcB64() const { + return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); + } + + bool isBoolReg() const; + + bool isSCSrcF16() const { + return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); + } + + bool isSCSrcV2F16() const { + return isSCSrcF16(); + } + + bool isSCSrcF32() const { + return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); + } + + bool isSCSrcF64() const { + return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); + } + + bool isSSrcB32() const { + return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); + } + + bool isSSrcB16() const { + return isSCSrcB16() || isLiteralImm(MVT::i16); + } + + bool isSSrcV2B16() const { + llvm_unreachable("cannot happen"); + return isSSrcB16(); + } + + bool isSSrcB64() const { + // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. + // See isVSrc64(). + return isSCSrcB64() || isLiteralImm(MVT::i64); + } + + bool isSSrcF32() const { + return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); + } + + bool isSSrcF64() const { + return isSCSrcB64() || isLiteralImm(MVT::f64); + } + + bool isSSrcF16() const { + return isSCSrcB16() || isLiteralImm(MVT::f16); + } + + bool isSSrcV2F16() const { + llvm_unreachable("cannot happen"); + return isSSrcF16(); + } + + bool isSSrcOrLdsB32() const { + return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || + isLiteralImm(MVT::i32) || isExpr(); + } + + bool isVCSrcB32() const { + return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); + } + + bool isVCSrcB64() const { + return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); + } + + bool isVCSrcB16() const { + return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); + } + + bool isVCSrcV2B16() const { + return isVCSrcB16(); + } + + bool isVCSrcF32() const { + return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); + } + + bool isVCSrcF64() const { + return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); + } + + bool isVCSrcF16() const { + return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); + } + + bool isVCSrcV2F16() const { + return isVCSrcF16(); + } + + bool isVSrcB32() const { + return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); + } + + bool isVSrcB64() const { + return isVCSrcF64() || isLiteralImm(MVT::i64); + } + + bool isVSrcB16() const { + return isVCSrcF16() || isLiteralImm(MVT::i16); + } + + bool isVSrcV2B16() const { + return isVSrcB16() || isLiteralImm(MVT::v2i16); + } + + bool isVSrcF32() const { + return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); + } + + bool isVSrcF64() const { + return isVCSrcF64() || isLiteralImm(MVT::f64); + } + + bool isVSrcF16() const { + return isVCSrcF16() || isLiteralImm(MVT::f16); + } + + bool isVSrcV2F16() const { + return isVSrcF16() || isLiteralImm(MVT::v2f16); + } + + bool isVISrcB32() const { + return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); + } + + bool isVISrcB16() const { + return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); + } + + bool isVISrcV2B16() const { + return isVISrcB16(); + } + + bool isVISrcF32() const { + return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); + } + + bool isVISrcF16() const { + return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); + } + + bool isVISrcV2F16() const { + return isVISrcF16() || isVISrcB32(); + } + + bool isAISrcB32() const { + return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); + } + + bool isAISrcB16() const { + return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); + } + + bool isAISrcV2B16() const { + return isAISrcB16(); + } + + bool isAISrcF32() const { + return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); + } + + bool isAISrcF16() const { + return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); + } + + bool isAISrcV2F16() const { + return isAISrcF16() || isAISrcB32(); + } + + bool isAISrc_128B32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); + } + + bool isAISrc_128B16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); + } + + bool isAISrc_128V2B16() const { + return isAISrc_128B16(); + } + + bool isAISrc_128F32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); + } + + bool isAISrc_128F16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); + } + + bool isAISrc_128V2F16() const { + return isAISrc_128F16() || isAISrc_128B32(); + } + + bool isAISrc_512B32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); + } + + bool isAISrc_512B16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); + } + + bool isAISrc_512V2B16() const { + return isAISrc_512B16(); + } + + bool isAISrc_512F32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); + } + + bool isAISrc_512F16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); + } + + bool isAISrc_512V2F16() const { + return isAISrc_512F16() || isAISrc_512B32(); + } + + bool isAISrc_1024B32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); + } + + bool isAISrc_1024B16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); + } + + bool isAISrc_1024V2B16() const { + return isAISrc_1024B16(); + } + + bool isAISrc_1024F32() const { + return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); + } + + bool isAISrc_1024F16() const { + return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); + } + + bool isAISrc_1024V2F16() const { + return isAISrc_1024F16() || isAISrc_1024B32(); + } + + bool isKImmFP32() const { + return isLiteralImm(MVT::f32); + } + + bool isKImmFP16() const { + return isLiteralImm(MVT::f16); + } + + bool isMem() const override { + return false; + } + + bool isExpr() const { + return Kind == Expression; + } + + bool isSoppBrTarget() const { + return isExpr() || isImm(); + } + + bool isSWaitCnt() const; + bool isHwreg() const; + bool isSendMsg() const; + bool isSwizzle() const; + bool isSMRDOffset8() const; + bool isSMRDOffset20() const; + bool isSMRDLiteralOffset() const; + bool isDPP8() const; + bool isDPPCtrl() const; + bool isBLGP() const; + bool isCBSZ() const; + bool isABID() const; + bool isGPRIdxMode() const; + bool isS16Imm() const; + bool isU16Imm() const; + bool isEndpgm() const; + + StringRef getExpressionAsToken() const { + assert(isExpr()); + const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); + return S->getSymbol().getName(); + } + + StringRef getToken() const { + assert(isToken()); + + if (Kind == Expression) + return getExpressionAsToken(); + + return StringRef(Tok.Data, Tok.Length); + } + + int64_t getImm() const { + assert(isImm()); + return Imm.Val; + } + + ImmTy getImmTy() const { + assert(isImm()); + return Imm.Type; + } + + unsigned getReg() const override { + assert(isRegKind()); + return Reg.RegNo; + } + + SMLoc getStartLoc() const override { + return StartLoc; + } + + SMLoc getEndLoc() const override { + return EndLoc; + } + + SMRange getLocRange() const { + return SMRange(StartLoc, EndLoc); + } + + Modifiers getModifiers() const { + assert(isRegKind() || isImmTy(ImmTyNone)); + return isRegKind() ? Reg.Mods : Imm.Mods; + } + + void setModifiers(Modifiers Mods) { + assert(isRegKind() || isImmTy(ImmTyNone)); + if (isRegKind()) + Reg.Mods = Mods; + else + Imm.Mods = Mods; + } + + bool hasModifiers() const { + return getModifiers().hasModifiers(); + } + + bool hasFPModifiers() const { + return getModifiers().hasFPModifiers(); + } + + bool hasIntModifiers() const { + return getModifiers().hasIntModifiers(); + } + + uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; + + void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; + + void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; + + template <unsigned Bitwidth> + void addKImmFPOperands(MCInst &Inst, unsigned N) const; + + void addKImmFP16Operands(MCInst &Inst, unsigned N) const { + addKImmFPOperands<16>(Inst, N); + } + + void addKImmFP32Operands(MCInst &Inst, unsigned N) const { + addKImmFPOperands<32>(Inst, N); + } + + void addRegOperands(MCInst &Inst, unsigned N) const; + + void addBoolRegOperands(MCInst &Inst, unsigned N) const { + addRegOperands(Inst, N); + } + + void addRegOrImmOperands(MCInst &Inst, unsigned N) const { + if (isRegKind()) + addRegOperands(Inst, N); + else if (isExpr()) + Inst.addOperand(MCOperand::createExpr(Expr)); + else + addImmOperands(Inst, N); + } + + void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { + Modifiers Mods = getModifiers(); + Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); + if (isRegKind()) { + addRegOperands(Inst, N); + } else { + addImmOperands(Inst, N, false); + } + } + + void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { + assert(!hasIntModifiers()); + addRegOrImmWithInputModsOperands(Inst, N); + } + + void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { + assert(!hasFPModifiers()); + addRegOrImmWithInputModsOperands(Inst, N); + } + + void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { + Modifiers Mods = getModifiers(); + Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); + assert(isRegKind()); + addRegOperands(Inst, N); + } + + void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { + assert(!hasIntModifiers()); + addRegWithInputModsOperands(Inst, N); + } + + void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { + assert(!hasFPModifiers()); + addRegWithInputModsOperands(Inst, N); + } + + void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { + if (isImm()) + addImmOperands(Inst, N); + else { + assert(isExpr()); + Inst.addOperand(MCOperand::createExpr(Expr)); + } + } + + static void printImmTy(raw_ostream& OS, ImmTy Type) { + switch (Type) { + case ImmTyNone: OS << "None"; break; + case ImmTyGDS: OS << "GDS"; break; + case ImmTyLDS: OS << "LDS"; break; + case ImmTyOffen: OS << "Offen"; break; + case ImmTyIdxen: OS << "Idxen"; break; + case ImmTyAddr64: OS << "Addr64"; break; + case ImmTyOffset: OS << "Offset"; break; + case ImmTyInstOffset: OS << "InstOffset"; break; + case ImmTyOffset0: OS << "Offset0"; break; + case ImmTyOffset1: OS << "Offset1"; break; + case ImmTyDLC: OS << "DLC"; break; + case ImmTyGLC: OS << "GLC"; break; + case ImmTySLC: OS << "SLC"; break; + case ImmTyTFE: OS << "TFE"; break; + case ImmTyD16: OS << "D16"; break; + case ImmTyFORMAT: OS << "FORMAT"; break; + case ImmTyClampSI: OS << "ClampSI"; break; + case ImmTyOModSI: OS << "OModSI"; break; + case ImmTyDPP8: OS << "DPP8"; break; + case ImmTyDppCtrl: OS << "DppCtrl"; break; + case ImmTyDppRowMask: OS << "DppRowMask"; break; + case ImmTyDppBankMask: OS << "DppBankMask"; break; + case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; + case ImmTyDppFi: OS << "FI"; break; + case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; + case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; + case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; + case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; + case ImmTyDMask: OS << "DMask"; break; + case ImmTyDim: OS << "Dim"; break; + case ImmTyUNorm: OS << "UNorm"; break; + case ImmTyDA: OS << "DA"; break; + case ImmTyR128A16: OS << "R128A16"; break; + case ImmTyLWE: OS << "LWE"; break; + case ImmTyOff: OS << "Off"; break; + case ImmTyExpTgt: OS << "ExpTgt"; break; + case ImmTyExpCompr: OS << "ExpCompr"; break; + case ImmTyExpVM: OS << "ExpVM"; break; + case ImmTyHwreg: OS << "Hwreg"; break; + case ImmTySendMsg: OS << "SendMsg"; break; + case ImmTyInterpSlot: OS << "InterpSlot"; break; + case ImmTyInterpAttr: OS << "InterpAttr"; break; + case ImmTyAttrChan: OS << "AttrChan"; break; + case ImmTyOpSel: OS << "OpSel"; break; + case ImmTyOpSelHi: OS << "OpSelHi"; break; + case ImmTyNegLo: OS << "NegLo"; break; + case ImmTyNegHi: OS << "NegHi"; break; + case ImmTySwizzle: OS << "Swizzle"; break; + case ImmTyGprIdxMode: OS << "GprIdxMode"; break; + case ImmTyHigh: OS << "High"; break; + case ImmTyBLGP: OS << "BLGP"; break; + case ImmTyCBSZ: OS << "CBSZ"; break; + case ImmTyABID: OS << "ABID"; break; + case ImmTyEndpgm: OS << "Endpgm"; break; + } + } + + void print(raw_ostream &OS) const override { + switch (Kind) { + case Register: + OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; + break; + case Immediate: + OS << '<' << getImm(); + if (getImmTy() != ImmTyNone) { + OS << " type: "; printImmTy(OS, getImmTy()); + } + OS << " mods: " << Imm.Mods << '>'; + break; + case Token: + OS << '\'' << getToken() << '\''; + break; + case Expression: + OS << "<expr " << *Expr << '>'; + break; + } + } + + static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, + int64_t Val, SMLoc Loc, + ImmTy Type = ImmTyNone, + bool IsFPImm = false) { + auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); + Op->Imm.Val = Val; + Op->Imm.IsFPImm = IsFPImm; + Op->Imm.Type = Type; + Op->Imm.Mods = Modifiers(); + Op->StartLoc = Loc; + Op->EndLoc = Loc; + return Op; + } + + static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, + StringRef Str, SMLoc Loc, + bool HasExplicitEncodingSize = true) { + auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); + Res->Tok.Data = Str.data(); + Res->Tok.Length = Str.size(); + Res->StartLoc = Loc; + Res->EndLoc = Loc; + return Res; + } + + static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, + unsigned RegNo, SMLoc S, + SMLoc E) { + auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); + Op->Reg.RegNo = RegNo; + Op->Reg.Mods = Modifiers(); + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, + const class MCExpr *Expr, SMLoc S) { + auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); + Op->Expr = Expr; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } +}; + +raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { + OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; + return OS; +} + +//===----------------------------------------------------------------------===// +// AsmParser +//===----------------------------------------------------------------------===// + +// Holds info related to the current kernel, e.g. count of SGPRs used. +// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next +// .amdgpu_hsa_kernel or at EOF. +class KernelScopeInfo { + int SgprIndexUnusedMin = -1; + int VgprIndexUnusedMin = -1; + MCContext *Ctx = nullptr; + + void usesSgprAt(int i) { + if (i >= SgprIndexUnusedMin) { + SgprIndexUnusedMin = ++i; + if (Ctx) { + MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); + Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); + } + } + } + + void usesVgprAt(int i) { + if (i >= VgprIndexUnusedMin) { + VgprIndexUnusedMin = ++i; + if (Ctx) { + MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); + Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); + } + } + } + +public: + KernelScopeInfo() = default; + + void initialize(MCContext &Context) { + Ctx = &Context; + usesSgprAt(SgprIndexUnusedMin = -1); + usesVgprAt(VgprIndexUnusedMin = -1); + } + + void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { + switch (RegKind) { + case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; + case IS_AGPR: // fall through + case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; + default: break; + } + } +}; + +class AMDGPUAsmParser : public MCTargetAsmParser { + MCAsmParser &Parser; + + // Number of extra operands parsed after the first optional operand. + // This may be necessary to skip hardcoded mandatory operands. + static const unsigned MAX_OPR_LOOKAHEAD = 8; + + unsigned ForcedEncodingSize = 0; + bool ForcedDPP = false; + bool ForcedSDWA = false; + KernelScopeInfo KernelScope; + + /// @name Auto-generated Match Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "AMDGPUGenAsmMatcher.inc" + + /// } + +private: + bool ParseAsAbsoluteExpression(uint32_t &Ret); + bool OutOfRangeError(SMRange Range); + /// Calculate VGPR/SGPR blocks required for given target, reserved + /// registers, and user-specified NextFreeXGPR values. + /// + /// \param Features [in] Target features, used for bug corrections. + /// \param VCCUsed [in] Whether VCC special SGPR is reserved. + /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. + /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. + /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel + /// descriptor field, if valid. + /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. + /// \param VGPRRange [in] Token range, used for VGPR diagnostics. + /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. + /// \param SGPRRange [in] Token range, used for SGPR diagnostics. + /// \param VGPRBlocks [out] Result VGPR block count. + /// \param SGPRBlocks [out] Result SGPR block count. + bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, + bool FlatScrUsed, bool XNACKUsed, + Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, + SMRange VGPRRange, unsigned NextFreeSGPR, + SMRange SGPRRange, unsigned &VGPRBlocks, + unsigned &SGPRBlocks); + bool ParseDirectiveAMDGCNTarget(); + bool ParseDirectiveAMDHSAKernel(); + bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); + bool ParseDirectiveHSACodeObjectVersion(); + bool ParseDirectiveHSACodeObjectISA(); + bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); + bool ParseDirectiveAMDKernelCodeT(); + bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; + bool ParseDirectiveAMDGPUHsaKernel(); + + bool ParseDirectiveISAVersion(); + bool ParseDirectiveHSAMetadata(); + bool ParseDirectivePALMetadataBegin(); + bool ParseDirectivePALMetadata(); + bool ParseDirectiveAMDGPULDS(); + + /// Common code to parse out a block of text (typically YAML) between start and + /// end directives. + bool ParseToEndDirective(const char *AssemblerDirectiveBegin, + const char *AssemblerDirectiveEnd, + std::string &CollectString); + + bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, + RegisterKind RegKind, unsigned Reg1, + unsigned RegNum); + bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, + unsigned& RegNum, unsigned& RegWidth, + unsigned *DwordRegIndex); + bool isRegister(); + bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; + Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); + void initializeGprCountSymbol(RegisterKind RegKind); + bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, + unsigned RegWidth); + void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); + void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, + bool IsGdsHardcoded); + +public: + enum AMDGPUMatchResultTy { + Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY + }; + enum OperandMode { + OperandMode_Default, + OperandMode_NSA, + }; + + using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; + + AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, + const MCInstrInfo &MII, + const MCTargetOptions &Options) + : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { + MCAsmParserExtension::Initialize(Parser); + + if (getFeatureBits().none()) { + // Set default features. + copySTI().ToggleFeature("southern-islands"); + } + + setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); + + { + // TODO: make those pre-defined variables read-only. + // Currently there is none suitable machinery in the core llvm-mc for this. + // MCSymbol::isRedefinable is intended for another purpose, and + // AsmParser::parseDirectiveSet() cannot be specialized for specific target. + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); + MCContext &Ctx = getContext(); + if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { + MCSymbol *Sym = + Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); + Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); + Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); + } else { + MCSymbol *Sym = + Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); + Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); + Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); + } + if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { + initializeGprCountSymbol(IS_VGPR); + initializeGprCountSymbol(IS_SGPR); + } else + KernelScope.initialize(getContext()); + } + } + + bool hasXNACK() const { + return AMDGPU::hasXNACK(getSTI()); + } + + bool hasMIMG_R128() const { + return AMDGPU::hasMIMG_R128(getSTI()); + } + + bool hasPackedD16() const { + return AMDGPU::hasPackedD16(getSTI()); + } + + bool isSI() const { + return AMDGPU::isSI(getSTI()); + } + + bool isCI() const { + return AMDGPU::isCI(getSTI()); + } + + bool isVI() const { + return AMDGPU::isVI(getSTI()); + } + + bool isGFX9() const { + return AMDGPU::isGFX9(getSTI()); + } + + bool isGFX10() const { + return AMDGPU::isGFX10(getSTI()); + } + + bool hasInv2PiInlineImm() const { + return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; + } + + bool hasFlatOffsets() const { + return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; + } + + bool hasSGPR102_SGPR103() const { + return !isVI() && !isGFX9(); + } + + bool hasSGPR104_SGPR105() const { + return isGFX10(); + } + + bool hasIntClamp() const { + return getFeatureBits()[AMDGPU::FeatureIntClamp]; + } + + AMDGPUTargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast<AMDGPUTargetStreamer &>(TS); + } + + const MCRegisterInfo *getMRI() const { + // We need this const_cast because for some reason getContext() is not const + // in MCAsmParser. + return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); + } + + const MCInstrInfo *getMII() const { + return &MII; + } + + const FeatureBitset &getFeatureBits() const { + return getSTI().getFeatureBits(); + } + + void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } + void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } + void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } + + unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } + bool isForcedVOP3() const { return ForcedEncodingSize == 64; } + bool isForcedDPP() const { return ForcedDPP; } + bool isForcedSDWA() const { return ForcedSDWA; } + ArrayRef<unsigned> getMatchedVariants() const; + + std::unique_ptr<AMDGPUOperand> parseRegister(); + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + unsigned checkTargetMatchPredicate(MCInst &Inst) override; + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + bool ParseDirective(AsmToken DirectiveID) override; + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, + OperandMode Mode = OperandMode_Default); + StringRef parseMnemonicSuffix(StringRef Name); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + //bool ProcessInstruction(MCInst &Inst); + + OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); + + OperandMatchResultTy + parseIntWithPrefix(const char *Prefix, OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, + bool (*ConvertResult)(int64_t &) = nullptr); + + OperandMatchResultTy + parseOperandArrayWithPrefix(const char *Prefix, + OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, + bool (*ConvertResult)(int64_t&) = nullptr); + + OperandMatchResultTy + parseNamedBit(const char *Name, OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); + OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, + StringRef &Value); + + bool isModifier(); + bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; + bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; + bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; + bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; + bool parseSP3NegModifier(); + OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); + OperandMatchResultTy parseReg(OperandVector &Operands); + OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); + OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); + OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); + OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); + OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); + OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); + OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); + + void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); + void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } + void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } + void cvtExp(MCInst &Inst, const OperandVector &Operands); + + bool parseCnt(int64_t &IntVal); + OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); + OperandMatchResultTy parseHwreg(OperandVector &Operands); + +private: + struct OperandInfoTy { + int64_t Id; + bool IsSymbolic = false; + bool IsDefined = false; + + OperandInfoTy(int64_t Id_) : Id(Id_) {} + }; + + bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); + bool validateSendMsg(const OperandInfoTy &Msg, + const OperandInfoTy &Op, + const OperandInfoTy &Stream, + const SMLoc Loc); + + bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); + bool validateHwreg(const OperandInfoTy &HwReg, + const int64_t Offset, + const int64_t Width, + const SMLoc Loc); + + void errorExpTgt(); + OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); + SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; + + bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); + bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); + bool validateSOPLiteral(const MCInst &Inst) const; + bool validateConstantBusLimitations(const MCInst &Inst); + bool validateEarlyClobberLimitations(const MCInst &Inst); + bool validateIntClampSupported(const MCInst &Inst); + bool validateMIMGAtomicDMask(const MCInst &Inst); + bool validateMIMGGatherDMask(const MCInst &Inst); + bool validateMIMGDataSize(const MCInst &Inst); + bool validateMIMGAddrSize(const MCInst &Inst); + bool validateMIMGD16(const MCInst &Inst); + bool validateMIMGDim(const MCInst &Inst); + bool validateLdsDirect(const MCInst &Inst); + bool validateOpSel(const MCInst &Inst); + bool validateVccOperand(unsigned Reg) const; + bool validateVOP3Literal(const MCInst &Inst) const; + bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); + bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; + unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; + + bool isId(const StringRef Id) const; + bool isId(const AsmToken &Token, const StringRef Id) const; + bool isToken(const AsmToken::TokenKind Kind) const; + bool trySkipId(const StringRef Id); + bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); + bool trySkipToken(const AsmToken::TokenKind Kind); + bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); + bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); + void peekTokens(MutableArrayRef<AsmToken> Tokens); + AsmToken::TokenKind getTokenKind() const; + bool parseExpr(int64_t &Imm); + StringRef getTokenStr() const; + AsmToken peekToken(); + AsmToken getToken() const; + SMLoc getLoc() const; + void lex(); + +public: + OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); + OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); + + OperandMatchResultTy parseExpTgt(OperandVector &Operands); + OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); + OperandMatchResultTy parseInterpSlot(OperandVector &Operands); + OperandMatchResultTy parseInterpAttr(OperandVector &Operands); + OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); + OperandMatchResultTy parseBoolReg(OperandVector &Operands); + + bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg); + OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); + bool parseSwizzleOffset(int64_t &Imm); + bool parseSwizzleMacro(int64_t &Imm); + bool parseSwizzleQuadPerm(int64_t &Imm); + bool parseSwizzleBitmaskPerm(int64_t &Imm); + bool parseSwizzleBroadcast(int64_t &Imm); + bool parseSwizzleSwap(int64_t &Imm); + bool parseSwizzleReverse(int64_t &Imm); + + OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); + int64_t parseGPRIdxMacro(); + + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } + void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } + void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } + void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } + void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); + + AMDGPUOperand::Ptr defaultDLC() const; + AMDGPUOperand::Ptr defaultGLC() const; + AMDGPUOperand::Ptr defaultSLC() const; + + AMDGPUOperand::Ptr defaultSMRDOffset8() const; + AMDGPUOperand::Ptr defaultSMRDOffset20() const; + AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; + AMDGPUOperand::Ptr defaultFlatOffset() const; + + OperandMatchResultTy parseOModOperand(OperandVector &Operands); + + void cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); + void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); + + void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); + + void cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic = false); + void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); + + OperandMatchResultTy parseDim(OperandVector &Operands); + OperandMatchResultTy parseDPP8(OperandVector &Operands); + OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); + AMDGPUOperand::Ptr defaultRowMask() const; + AMDGPUOperand::Ptr defaultBankMask() const; + AMDGPUOperand::Ptr defaultBoundCtrl() const; + AMDGPUOperand::Ptr defaultFI() const; + void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); + void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } + + OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, + AMDGPUOperand::ImmTy Type); + OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); + void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); + void cvtSDWA(MCInst &Inst, const OperandVector &Operands, + uint64_t BasicInstType, bool skipVcc = false); + + AMDGPUOperand::Ptr defaultBLGP() const; + AMDGPUOperand::Ptr defaultCBSZ() const; + AMDGPUOperand::Ptr defaultABID() const; + + OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); + AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; +}; + +struct OptionalOperand { + const char *Name; + AMDGPUOperand::ImmTy Type; + bool IsBit; + bool (*ConvertResult)(int64_t&); +}; + +} // end anonymous namespace + +// May be called with integer type with equivalent bitwidth. +static const fltSemantics *getFltSemantics(unsigned Size) { + switch (Size) { + case 4: + return &APFloat::IEEEsingle(); + case 8: + return &APFloat::IEEEdouble(); + case 2: + return &APFloat::IEEEhalf(); + default: + llvm_unreachable("unsupported fp type"); + } +} + +static const fltSemantics *getFltSemantics(MVT VT) { + return getFltSemantics(VT.getSizeInBits() / 8); +} + +static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { + switch (OperandType) { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + return &APFloat::IEEEsingle(); + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + return &APFloat::IEEEdouble(); + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: + return &APFloat::IEEEhalf(); + default: + llvm_unreachable("unsupported fp type"); + } +} + +//===----------------------------------------------------------------------===// +// Operand +//===----------------------------------------------------------------------===// + +static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { + bool Lost; + + // Convert literal to single precision + APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), + APFloat::rmNearestTiesToEven, + &Lost); + // We allow precision lost but not overflow or underflow + if (Status != APFloat::opOK && + Lost && + ((Status & APFloat::opOverflow) != 0 || + (Status & APFloat::opUnderflow) != 0)) { + return false; + } + + return true; +} + +static bool isSafeTruncation(int64_t Val, unsigned Size) { + return isUIntN(Size, Val) || isIntN(Size, Val); +} + +bool AMDGPUOperand::isInlinableImm(MVT type) const { + + // This is a hack to enable named inline values like + // shared_base with both 32-bit and 64-bit operands. + // Note that these values are defined as + // 32-bit operands only. + if (isInlineValue()) { + return true; + } + + if (!isImmTy(ImmTyNone)) { + // Only plain immediates are inlinable (e.g. "clamp" attribute is not) + return false; + } + // TODO: We should avoid using host float here. It would be better to + // check the float bit values which is what a few other places do. + // We've had bot failures before due to weird NaN support on mips hosts. + + APInt Literal(64, Imm.Val); + + if (Imm.IsFPImm) { // We got fp literal token + if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand + return AMDGPU::isInlinableLiteral64(Imm.Val, + AsmParser->hasInv2PiInlineImm()); + } + + APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); + if (!canLosslesslyConvertToFPType(FPLiteral, type)) + return false; + + if (type.getScalarSizeInBits() == 16) { + return AMDGPU::isInlinableLiteral16( + static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), + AsmParser->hasInv2PiInlineImm()); + } + + // Check if single precision literal is inlinable + return AMDGPU::isInlinableLiteral32( + static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), + AsmParser->hasInv2PiInlineImm()); + } + + // We got int literal token. + if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand + return AMDGPU::isInlinableLiteral64(Imm.Val, + AsmParser->hasInv2PiInlineImm()); + } + + if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { + return false; + } + + if (type.getScalarSizeInBits() == 16) { + return AMDGPU::isInlinableLiteral16( + static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), + AsmParser->hasInv2PiInlineImm()); + } + + return AMDGPU::isInlinableLiteral32( + static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), + AsmParser->hasInv2PiInlineImm()); +} + +bool AMDGPUOperand::isLiteralImm(MVT type) const { + // Check that this immediate can be added as literal + if (!isImmTy(ImmTyNone)) { + return false; + } + + if (!Imm.IsFPImm) { + // We got int literal token. + + if (type == MVT::f64 && hasFPModifiers()) { + // Cannot apply fp modifiers to int literals preserving the same semantics + // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, + // disable these cases. + return false; + } + + unsigned Size = type.getSizeInBits(); + if (Size == 64) + Size = 32; + + // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP + // types. + return isSafeTruncation(Imm.Val, Size); + } + + // We got fp literal token + if (type == MVT::f64) { // Expected 64-bit fp operand + // We would set low 64-bits of literal to zeroes but we accept this literals + return true; + } + + if (type == MVT::i64) { // Expected 64-bit int operand + // We don't allow fp literals in 64-bit integer instructions. It is + // unclear how we should encode them. + return false; + } + + // We allow fp literals with f16x2 operands assuming that the specified + // literal goes into the lower half and the upper half is zero. We also + // require that the literal may be losslesly converted to f16. + MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : + (type == MVT::v2i16)? MVT::i16 : type; + + APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); + return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); +} + +bool AMDGPUOperand::isRegClass(unsigned RCID) const { + return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); +} + +bool AMDGPUOperand::isSDWAOperand(MVT type) const { + if (AsmParser->isVI()) + return isVReg32(); + else if (AsmParser->isGFX9() || AsmParser->isGFX10()) + return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); + else + return false; +} + +bool AMDGPUOperand::isSDWAFP16Operand() const { + return isSDWAOperand(MVT::f16); +} + +bool AMDGPUOperand::isSDWAFP32Operand() const { + return isSDWAOperand(MVT::f32); +} + +bool AMDGPUOperand::isSDWAInt16Operand() const { + return isSDWAOperand(MVT::i16); +} + +bool AMDGPUOperand::isSDWAInt32Operand() const { + return isSDWAOperand(MVT::i32); +} + +bool AMDGPUOperand::isBoolReg() const { + return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? + isSCSrcB64() : isSCSrcB32(); +} + +uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const +{ + assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); + assert(Size == 2 || Size == 4 || Size == 8); + + const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); + + if (Imm.Mods.Abs) { + Val &= ~FpSignMask; + } + if (Imm.Mods.Neg) { + Val ^= FpSignMask; + } + + return Val; +} + +void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { + if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), + Inst.getNumOperands())) { + addLiteralImmOperand(Inst, Imm.Val, + ApplyModifiers & + isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); + } else { + assert(!isImmTy(ImmTyNone) || !hasModifiers()); + Inst.addOperand(MCOperand::createImm(Imm.Val)); + } +} + +void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { + const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); + auto OpNum = Inst.getNumOperands(); + // Check that this operand accepts literals + assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); + + if (ApplyModifiers) { + assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); + const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); + Val = applyInputFPModifiers(Val, Size); + } + + APInt Literal(64, Val); + uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; + + if (Imm.IsFPImm) { // We got fp literal token + switch (OpTy) { + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), + AsmParser->hasInv2PiInlineImm())) { + Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); + return; + } + + // Non-inlineable + if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand + // For fp operands we check if low 32 bits are zeros + if (Literal.getLoBits(32) != 0) { + const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), + "Can't encode literal as exact 64-bit floating-point operand. " + "Low 32-bits will be set to zero"); + } + + Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); + return; + } + + // We don't allow fp literals in 64-bit integer instructions. It is + // unclear how we should encode them. This case should be checked earlier + // in predicate methods (isLiteralImm()) + llvm_unreachable("fp literal in 64-bit integer instruction."); + + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: { + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble(), Literal); + // Convert literal to single precision + FPLiteral.convert(*getOpFltSemantics(OpTy), + APFloat::rmNearestTiesToEven, &lost); + // We allow precision lost but not overflow or underflow. This should be + // checked earlier in isLiteralImm() + + uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); + Inst.addOperand(MCOperand::createImm(ImmVal)); + return; + } + default: + llvm_unreachable("invalid operand size"); + } + + return; + } + + // We got int literal token. + // Only sign extend inline immediates. + switch (OpTy) { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: + if (isSafeTruncation(Val, 32) && + AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), + AsmParser->hasInv2PiInlineImm())) { + Inst.addOperand(MCOperand::createImm(Val)); + return; + } + + Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); + return; + + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { + Inst.addOperand(MCOperand::createImm(Val)); + return; + } + + Inst.addOperand(MCOperand::createImm(Lo_32(Val))); + return; + + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + if (isSafeTruncation(Val, 16) && + AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), + AsmParser->hasInv2PiInlineImm())) { + Inst.addOperand(MCOperand::createImm(Val)); + return; + } + + Inst.addOperand(MCOperand::createImm(Val & 0xffff)); + return; + + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { + assert(isSafeTruncation(Val, 16)); + assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), + AsmParser->hasInv2PiInlineImm())); + + Inst.addOperand(MCOperand::createImm(Val)); + return; + } + default: + llvm_unreachable("invalid operand size"); + } +} + +template <unsigned Bitwidth> +void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { + APInt Literal(64, Imm.Val); + + if (!Imm.IsFPImm) { + // We got int literal token. + Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); + return; + } + + bool Lost; + APFloat FPLiteral(APFloat::IEEEdouble(), Literal); + FPLiteral.convert(*getFltSemantics(Bitwidth / 8), + APFloat::rmNearestTiesToEven, &Lost); + Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); +} + +void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); +} + +static bool isInlineValue(unsigned Reg) { + switch (Reg) { + case AMDGPU::SRC_SHARED_BASE: + case AMDGPU::SRC_SHARED_LIMIT: + case AMDGPU::SRC_PRIVATE_BASE: + case AMDGPU::SRC_PRIVATE_LIMIT: + case AMDGPU::SRC_POPS_EXITING_WAVE_ID: + return true; + case AMDGPU::SRC_VCCZ: + case AMDGPU::SRC_EXECZ: + case AMDGPU::SRC_SCC: + return true; + default: + return false; + } +} + +bool AMDGPUOperand::isInlineValue() const { + return isRegKind() && ::isInlineValue(getReg()); +} + +//===----------------------------------------------------------------------===// +// AsmParser +//===----------------------------------------------------------------------===// + +static int getRegClass(RegisterKind Is, unsigned RegWidth) { + if (Is == IS_VGPR) { + switch (RegWidth) { + default: return -1; + case 1: return AMDGPU::VGPR_32RegClassID; + case 2: return AMDGPU::VReg_64RegClassID; + case 3: return AMDGPU::VReg_96RegClassID; + case 4: return AMDGPU::VReg_128RegClassID; + case 8: return AMDGPU::VReg_256RegClassID; + case 16: return AMDGPU::VReg_512RegClassID; + } + } else if (Is == IS_TTMP) { + switch (RegWidth) { + default: return -1; + case 1: return AMDGPU::TTMP_32RegClassID; + case 2: return AMDGPU::TTMP_64RegClassID; + case 4: return AMDGPU::TTMP_128RegClassID; + case 8: return AMDGPU::TTMP_256RegClassID; + case 16: return AMDGPU::TTMP_512RegClassID; + } + } else if (Is == IS_SGPR) { + switch (RegWidth) { + default: return -1; + case 1: return AMDGPU::SGPR_32RegClassID; + case 2: return AMDGPU::SGPR_64RegClassID; + case 4: return AMDGPU::SGPR_128RegClassID; + case 8: return AMDGPU::SGPR_256RegClassID; + case 16: return AMDGPU::SGPR_512RegClassID; + } + } else if (Is == IS_AGPR) { + switch (RegWidth) { + default: return -1; + case 1: return AMDGPU::AGPR_32RegClassID; + case 2: return AMDGPU::AReg_64RegClassID; + case 4: return AMDGPU::AReg_128RegClassID; + case 16: return AMDGPU::AReg_512RegClassID; + case 32: return AMDGPU::AReg_1024RegClassID; + } + } + return -1; +} + +static unsigned getSpecialRegForName(StringRef RegName) { + return StringSwitch<unsigned>(RegName) + .Case("exec", AMDGPU::EXEC) + .Case("vcc", AMDGPU::VCC) + .Case("flat_scratch", AMDGPU::FLAT_SCR) + .Case("xnack_mask", AMDGPU::XNACK_MASK) + .Case("shared_base", AMDGPU::SRC_SHARED_BASE) + .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) + .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) + .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) + .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) + .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) + .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) + .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) + .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) + .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) + .Case("lds_direct", AMDGPU::LDS_DIRECT) + .Case("src_lds_direct", AMDGPU::LDS_DIRECT) + .Case("m0", AMDGPU::M0) + .Case("vccz", AMDGPU::SRC_VCCZ) + .Case("src_vccz", AMDGPU::SRC_VCCZ) + .Case("execz", AMDGPU::SRC_EXECZ) + .Case("src_execz", AMDGPU::SRC_EXECZ) + .Case("scc", AMDGPU::SRC_SCC) + .Case("src_scc", AMDGPU::SRC_SCC) + .Case("tba", AMDGPU::TBA) + .Case("tma", AMDGPU::TMA) + .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) + .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) + .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) + .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) + .Case("vcc_lo", AMDGPU::VCC_LO) + .Case("vcc_hi", AMDGPU::VCC_HI) + .Case("exec_lo", AMDGPU::EXEC_LO) + .Case("exec_hi", AMDGPU::EXEC_HI) + .Case("tma_lo", AMDGPU::TMA_LO) + .Case("tma_hi", AMDGPU::TMA_HI) + .Case("tba_lo", AMDGPU::TBA_LO) + .Case("tba_hi", AMDGPU::TBA_HI) + .Case("null", AMDGPU::SGPR_NULL) + .Default(0); +} + +bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + auto R = parseRegister(); + if (!R) return true; + assert(R->isReg()); + RegNo = R->getReg(); + StartLoc = R->getStartLoc(); + EndLoc = R->getEndLoc(); + return false; +} + +bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, + RegisterKind RegKind, unsigned Reg1, + unsigned RegNum) { + switch (RegKind) { + case IS_SPECIAL: + if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { + Reg = AMDGPU::EXEC; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { + Reg = AMDGPU::FLAT_SCR; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { + Reg = AMDGPU::XNACK_MASK; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { + Reg = AMDGPU::VCC; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { + Reg = AMDGPU::TBA; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { + Reg = AMDGPU::TMA; + RegWidth = 2; + return true; + } + return false; + case IS_VGPR: + case IS_SGPR: + case IS_AGPR: + case IS_TTMP: + if (Reg1 != Reg + RegWidth) { + return false; + } + RegWidth++; + return true; + default: + llvm_unreachable("unexpected register kind"); + } +} + +static const StringRef Registers[] = { + { "v" }, + { "s" }, + { "ttmp" }, + { "acc" }, + { "a" }, +}; + +bool +AMDGPUAsmParser::isRegister(const AsmToken &Token, + const AsmToken &NextToken) const { + + // A list of consecutive registers: [s0,s1,s2,s3] + if (Token.is(AsmToken::LBrac)) + return true; + + if (!Token.is(AsmToken::Identifier)) + return false; + + // A single register like s0 or a range of registers like s[0:1] + + StringRef RegName = Token.getString(); + + for (StringRef Reg : Registers) { + if (RegName.startswith(Reg)) { + if (Reg.size() < RegName.size()) { + unsigned RegNum; + // A single register with an index: rXX + if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) + return true; + } else { + // A range of registers: r[XX:YY]. + if (NextToken.is(AsmToken::LBrac)) + return true; + } + } + } + + return getSpecialRegForName(RegName); +} + +bool +AMDGPUAsmParser::isRegister() +{ + return isRegister(getToken(), peekToken()); +} + +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, + unsigned &RegNum, unsigned &RegWidth, + unsigned *DwordRegIndex) { + if (DwordRegIndex) { *DwordRegIndex = 0; } + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + if (getLexer().is(AsmToken::Identifier)) { + StringRef RegName = Parser.getTok().getString(); + if ((Reg = getSpecialRegForName(RegName))) { + Parser.Lex(); + RegKind = IS_SPECIAL; + } else { + unsigned RegNumIndex = 0; + if (RegName[0] == 'v') { + RegNumIndex = 1; + RegKind = IS_VGPR; + } else if (RegName[0] == 's') { + RegNumIndex = 1; + RegKind = IS_SGPR; + } else if (RegName[0] == 'a') { + RegNumIndex = RegName.startswith("acc") ? 3 : 1; + RegKind = IS_AGPR; + } else if (RegName.startswith("ttmp")) { + RegNumIndex = strlen("ttmp"); + RegKind = IS_TTMP; + } else { + return false; + } + if (RegName.size() > RegNumIndex) { + // Single 32-bit register: vXX. + if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) + return false; + Parser.Lex(); + RegWidth = 1; + } else { + // Range of registers: v[XX:YY]. ":YY" is optional. + Parser.Lex(); + int64_t RegLo, RegHi; + if (getLexer().isNot(AsmToken::LBrac)) + return false; + Parser.Lex(); + + if (getParser().parseAbsoluteExpression(RegLo)) + return false; + + const bool isRBrace = getLexer().is(AsmToken::RBrac); + if (!isRBrace && getLexer().isNot(AsmToken::Colon)) + return false; + Parser.Lex(); + + if (isRBrace) { + RegHi = RegLo; + } else { + if (getParser().parseAbsoluteExpression(RegHi)) + return false; + + if (getLexer().isNot(AsmToken::RBrac)) + return false; + Parser.Lex(); + } + RegNum = (unsigned) RegLo; + RegWidth = (RegHi - RegLo) + 1; + } + } + } else if (getLexer().is(AsmToken::LBrac)) { + // List of consecutive registers: [s0,s1,s2,s3] + Parser.Lex(); + if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) + return false; + if (RegWidth != 1) + return false; + RegisterKind RegKind1; + unsigned Reg1, RegNum1, RegWidth1; + do { + if (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); + } else if (getLexer().is(AsmToken::RBrac)) { + Parser.Lex(); + break; + } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { + if (RegWidth1 != 1) { + return false; + } + if (RegKind1 != RegKind) { + return false; + } + if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { + return false; + } + } else { + return false; + } + } while (true); + } else { + return false; + } + switch (RegKind) { + case IS_SPECIAL: + RegNum = 0; + RegWidth = 1; + break; + case IS_VGPR: + case IS_SGPR: + case IS_AGPR: + case IS_TTMP: + { + unsigned Size = 1; + if (RegKind == IS_SGPR || RegKind == IS_TTMP) { + // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. + Size = std::min(RegWidth, 4u); + } + if (RegNum % Size != 0) + return false; + if (DwordRegIndex) { *DwordRegIndex = RegNum; } + RegNum = RegNum / Size; + int RCID = getRegClass(RegKind, RegWidth); + if (RCID == -1) + return false; + const MCRegisterClass RC = TRI->getRegClass(RCID); + if (RegNum >= RC.getNumRegs()) + return false; + Reg = RC.getRegister(RegNum); + break; + } + + default: + llvm_unreachable("unexpected register kind"); + } + + if (!subtargetHasRegister(*TRI, Reg)) + return false; + return true; +} + +Optional<StringRef> +AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { + switch (RegKind) { + case IS_VGPR: + return StringRef(".amdgcn.next_free_vgpr"); + case IS_SGPR: + return StringRef(".amdgcn.next_free_sgpr"); + default: + return None; + } +} + +void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { + auto SymbolName = getGprCountSymbolName(RegKind); + assert(SymbolName && "initializing invalid register kind"); + MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); + Sym->setVariableValue(MCConstantExpr::create(0, getContext())); +} + +bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, + unsigned DwordRegIndex, + unsigned RegWidth) { + // Symbols are only defined for GCN targets + if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) + return true; + + auto SymbolName = getGprCountSymbolName(RegKind); + if (!SymbolName) + return true; + MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); + + int64_t NewMax = DwordRegIndex + RegWidth - 1; + int64_t OldCount; + + if (!Sym->isVariable()) + return !Error(getParser().getTok().getLoc(), + ".amdgcn.next_free_{v,s}gpr symbols must be variable"); + if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) + return !Error( + getParser().getTok().getLoc(), + ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); + + if (OldCount <= NewMax) + Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); + + return true; +} + +std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { + const auto &Tok = Parser.getTok(); + SMLoc StartLoc = Tok.getLoc(); + SMLoc EndLoc = Tok.getEndLoc(); + RegisterKind RegKind; + unsigned Reg, RegNum, RegWidth, DwordRegIndex; + + if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { + //FIXME: improve error messages (bug 41303). + Error(StartLoc, "not a valid operand."); + return nullptr; + } + if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { + if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) + return nullptr; + } else + KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); + return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); +} + +OperandMatchResultTy +AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { + // TODO: add syntactic sugar for 1/(2*PI) + + assert(!isRegister()); + assert(!isModifier()); + + const auto& Tok = getToken(); + const auto& NextTok = peekToken(); + bool IsReal = Tok.is(AsmToken::Real); + SMLoc S = getLoc(); + bool Negate = false; + + if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { + lex(); + IsReal = true; + Negate = true; + } + + if (IsReal) { + // Floating-point expressions are not supported. + // Can only allow floating-point literals with an + // optional sign. + + StringRef Num = getTokenStr(); + lex(); + + APFloat RealVal(APFloat::IEEEdouble()); + auto roundMode = APFloat::rmNearestTiesToEven; + if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { + return MatchOperand_ParseFail; + } + if (Negate) + RealVal.changeSign(); + + Operands.push_back( + AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, + AMDGPUOperand::ImmTyNone, true)); + + return MatchOperand_Success; + + } else { + int64_t IntVal; + const MCExpr *Expr; + SMLoc S = getLoc(); + + if (HasSP3AbsModifier) { + // This is a workaround for handling expressions + // as arguments of SP3 'abs' modifier, for example: + // |1.0| + // |-1| + // |1+x| + // This syntax is not compatible with syntax of standard + // MC expressions (due to the trailing '|'). + SMLoc EndLoc; + if (getParser().parsePrimaryExpr(Expr, EndLoc)) + return MatchOperand_ParseFail; + } else { + if (Parser.parseExpression(Expr)) + return MatchOperand_ParseFail; + } + + if (Expr->evaluateAsAbsolute(IntVal)) { + Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); + } else { + Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); + } + + return MatchOperand_Success; + } + + return MatchOperand_NoMatch; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseReg(OperandVector &Operands) { + if (!isRegister()) + return MatchOperand_NoMatch; + + if (auto R = parseRegister()) { + assert(R->isReg()); + Operands.push_back(std::move(R)); + return MatchOperand_Success; + } + return MatchOperand_ParseFail; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { + auto res = parseReg(Operands); + if (res != MatchOperand_NoMatch) { + return res; + } else if (isModifier()) { + return MatchOperand_NoMatch; + } else { + return parseImm(Operands, HasSP3AbsMod); + } +} + +bool +AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { + if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { + const auto &str = Token.getString(); + return str == "abs" || str == "neg" || str == "sext"; + } + return false; +} + +bool +AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { + return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); +} + +bool +AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { + return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); +} + +bool +AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { + return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); +} + +// Check if this is an operand modifier or an opcode modifier +// which may look like an expression but it is not. We should +// avoid parsing these modifiers as expressions. Currently +// recognized sequences are: +// |...| +// abs(...) +// neg(...) +// sext(...) +// -reg +// -|...| +// -abs(...) +// name:... +// Note that simple opcode modifiers like 'gds' may be parsed as +// expressions; this is a special case. See getExpressionAsToken. +// +bool +AMDGPUAsmParser::isModifier() { + + AsmToken Tok = getToken(); + AsmToken NextToken[2]; + peekTokens(NextToken); + + return isOperandModifier(Tok, NextToken[0]) || + (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || + isOpcodeModifierWithVal(Tok, NextToken[0]); +} + +// Check if the current token is an SP3 'neg' modifier. +// Currently this modifier is allowed in the following context: +// +// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". +// 2. Before an 'abs' modifier: -abs(...) +// 3. Before an SP3 'abs' modifier: -|...| +// +// In all other cases "-" is handled as a part +// of an expression that follows the sign. +// +// Note: When "-" is followed by an integer literal, +// this is interpreted as integer negation rather +// than a floating-point NEG modifier applied to N. +// Beside being contr-intuitive, such use of floating-point +// NEG modifier would have resulted in different meaning +// of integer literals used with VOP1/2/C and VOP3, +// for example: +// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF +// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 +// Negative fp literals with preceding "-" are +// handled likewise for unifomtity +// +bool +AMDGPUAsmParser::parseSP3NegModifier() { + + AsmToken NextToken[2]; + peekTokens(NextToken); + + if (isToken(AsmToken::Minus) && + (isRegister(NextToken[0], NextToken[1]) || + NextToken[0].is(AsmToken::Pipe) || + isId(NextToken[0], "abs"))) { + lex(); + return true; + } + + return false; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, + bool AllowImm) { + bool Neg, SP3Neg; + bool Abs, SP3Abs; + SMLoc Loc; + + // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. + if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { + Error(getLoc(), "invalid syntax, expected 'neg' modifier"); + return MatchOperand_ParseFail; + } + + SP3Neg = parseSP3NegModifier(); + + Loc = getLoc(); + Neg = trySkipId("neg"); + if (Neg && SP3Neg) { + Error(Loc, "expected register or immediate"); + return MatchOperand_ParseFail; + } + if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) + return MatchOperand_ParseFail; + + Abs = trySkipId("abs"); + if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) + return MatchOperand_ParseFail; + + Loc = getLoc(); + SP3Abs = trySkipToken(AsmToken::Pipe); + if (Abs && SP3Abs) { + Error(Loc, "expected register or immediate"); + return MatchOperand_ParseFail; + } + + OperandMatchResultTy Res; + if (AllowImm) { + Res = parseRegOrImm(Operands, SP3Abs); + } else { + Res = parseReg(Operands); + } + if (Res != MatchOperand_Success) { + return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; + } + + if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) + return MatchOperand_ParseFail; + if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) + return MatchOperand_ParseFail; + if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) + return MatchOperand_ParseFail; + + AMDGPUOperand::Modifiers Mods; + Mods.Abs = Abs || SP3Abs; + Mods.Neg = Neg || SP3Neg; + + if (Mods.hasFPModifiers()) { + AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); + if (Op.isExpr()) { + Error(Op.getStartLoc(), "expected an absolute expression"); + return MatchOperand_ParseFail; + } + Op.setModifiers(Mods); + } + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, + bool AllowImm) { + bool Sext = trySkipId("sext"); + if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) + return MatchOperand_ParseFail; + + OperandMatchResultTy Res; + if (AllowImm) { + Res = parseRegOrImm(Operands); + } else { + Res = parseReg(Operands); + } + if (Res != MatchOperand_Success) { + return Sext? MatchOperand_ParseFail : Res; + } + + if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) + return MatchOperand_ParseFail; + + AMDGPUOperand::Modifiers Mods; + Mods.Sext = Sext; + + if (Mods.hasIntModifiers()) { + AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); + if (Op.isExpr()) { + Error(Op.getStartLoc(), "expected an absolute expression"); + return MatchOperand_ParseFail; + } + Op.setModifiers(Mods); + } + + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { + return parseRegOrImmWithFPInputMods(Operands, false); +} + +OperandMatchResultTy +AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { + return parseRegOrImmWithIntInputMods(Operands, false); +} + +OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { + auto Loc = getLoc(); + if (trySkipId("off")) { + Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, + AMDGPUOperand::ImmTyOff, false)); + return MatchOperand_Success; + } + + if (!isRegister()) + return MatchOperand_NoMatch; + + std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); + if (Reg) { + Operands.push_back(std::move(Reg)); + return MatchOperand_Success; + } + + return MatchOperand_ParseFail; + +} + +unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + + if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || + (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || + (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || + (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) + return Match_InvalidOperand; + + if ((TSFlags & SIInstrFlags::VOP3) && + (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && + getForcedEncodingSize() != 64) + return Match_PreferE32; + + if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || + Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { + // v_mac_f32/16 allow only dst_sel == DWORD; + auto OpNum = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); + const auto &Op = Inst.getOperand(OpNum); + if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { + return Match_InvalidOperand; + } + } + + return Match_Success; +} + +// What asm variants we should check +ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { + if (getForcedEncodingSize() == 32) { + static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; + return makeArrayRef(Variants); + } + + if (isForcedVOP3()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; + return makeArrayRef(Variants); + } + + if (isForcedSDWA()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, + AMDGPUAsmVariants::SDWA9}; + return makeArrayRef(Variants); + } + + if (isForcedDPP()) { + static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; + return makeArrayRef(Variants); + } + + static const unsigned Variants[] = { + AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP + }; + + return makeArrayRef(Variants); +} + +unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + const unsigned Num = Desc.getNumImplicitUses(); + for (unsigned i = 0; i < Num; ++i) { + unsigned Reg = Desc.ImplicitUses[i]; + switch (Reg) { + case AMDGPU::FLAT_SCR: + case AMDGPU::VCC: + case AMDGPU::VCC_LO: + case AMDGPU::VCC_HI: + case AMDGPU::M0: + case AMDGPU::SGPR_NULL: + return Reg; + default: + break; + } + } + return AMDGPU::NoRegister; +} + +// NB: This code is correct only when used to check constant +// bus limitations because GFX7 support no f16 inline constants. +// Note that there are no cases when a GFX7 opcode violates +// constant bus limitations due to the use of an f16 constant. +bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, + unsigned OpIdx) const { + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + + if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { + return false; + } + + const MCOperand &MO = Inst.getOperand(OpIdx); + + int64_t Val = MO.getImm(); + auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); + + switch (OpSize) { // expected operand size + case 8: + return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); + case 4: + return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); + case 2: { + const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; + if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || + OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { + return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); + } else { + return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); + } + } + default: + llvm_unreachable("invalid operand size"); + } +} + +bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { + const MCOperand &MO = Inst.getOperand(OpIdx); + if (MO.isImm()) { + return !isInlineConstant(Inst, OpIdx); + } + return !MO.isReg() || + isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); +} + +bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + unsigned ConstantBusUseCount = 0; + unsigned NumLiterals = 0; + unsigned LiteralSize; + + if (Desc.TSFlags & + (SIInstrFlags::VOPC | + SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | + SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | + SIInstrFlags::SDWA)) { + // Check special imm operands (used by madmk, etc) + if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { + ++ConstantBusUseCount; + } + + SmallDenseSet<unsigned> SGPRsUsed; + unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); + if (SGPRUsed != AMDGPU::NoRegister) { + SGPRsUsed.insert(SGPRUsed); + ++ConstantBusUseCount; + } + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + for (int OpIdx : OpIndices) { + if (OpIdx == -1) break; + + const MCOperand &MO = Inst.getOperand(OpIdx); + if (usesConstantBus(Inst, OpIdx)) { + if (MO.isReg()) { + const unsigned Reg = mc2PseudoReg(MO.getReg()); + // Pairs of registers with a partial intersections like these + // s0, s[0:1] + // flat_scratch_lo, flat_scratch + // flat_scratch_lo, flat_scratch_hi + // are theoretically valid but they are disabled anyway. + // Note that this code mimics SIInstrInfo::verifyInstruction + if (!SGPRsUsed.count(Reg)) { + SGPRsUsed.insert(Reg); + ++ConstantBusUseCount; + } + } else { // Expression or a literal + + if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) + continue; // special operand like VINTERP attr_chan + + // An instruction may use only one literal. + // This has been validated on the previous step. + // See validateVOP3Literal. + // This literal may be used as more than one operand. + // If all these operands are of the same size, + // this literal counts as one scalar value. + // Otherwise it counts as 2 scalar values. + // See "GFX10 Shader Programming", section 3.6.2.3. + + unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); + if (Size < 4) Size = 4; + + if (NumLiterals == 0) { + NumLiterals = 1; + LiteralSize = Size; + } else if (LiteralSize != Size) { + NumLiterals = 2; + } + } + } + } + } + ConstantBusUseCount += NumLiterals; + + if (isGFX10()) + return ConstantBusUseCount <= 2; + + return ConstantBusUseCount <= 1; +} + +bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + + const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); + if (DstIdx == -1 || + Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { + return true; + } + + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + assert(DstIdx != -1); + const MCOperand &Dst = Inst.getOperand(DstIdx); + assert(Dst.isReg()); + const unsigned DstReg = mc2PseudoReg(Dst.getReg()); + + const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + for (int SrcIdx : SrcIndices) { + if (SrcIdx == -1) break; + const MCOperand &Src = Inst.getOperand(SrcIdx); + if (Src.isReg()) { + const unsigned SrcReg = mc2PseudoReg(Src.getReg()); + if (isRegIntersect(DstReg, SrcReg, TRI)) { + return false; + } + } + } + + return true; +} + +bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { + int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); + assert(ClampIdx != -1); + return Inst.getOperand(ClampIdx).getImm() == 0; + } + + return true; +} + +bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) + return true; + + int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); + int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); + int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); + + assert(VDataIdx != -1); + assert(DMaskIdx != -1); + assert(TFEIdx != -1); + + unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); + unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; + unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; + if (DMask == 0) + DMask = 1; + + unsigned DataSize = + (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); + if (hasPackedD16()) { + int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); + if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) + DataSize = (DataSize + 1) / 2; + } + + return (VDataSize / 4) == DataSize + TFESize; +} + +bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) + return true; + + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); + int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); + int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); + + assert(VAddr0Idx != -1); + assert(SrsrcIdx != -1); + assert(DimIdx != -1); + assert(SrsrcIdx > VAddr0Idx); + + unsigned Dim = Inst.getOperand(DimIdx).getImm(); + const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); + bool IsNSA = SrsrcIdx - VAddr0Idx > 1; + unsigned VAddrSize = + IsNSA ? SrsrcIdx - VAddr0Idx + : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; + + unsigned AddrSize = BaseOpcode->NumExtraArgs + + (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + + (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + + (BaseOpcode->LodOrClampOrMip ? 1 : 0); + if (!IsNSA) { + if (AddrSize > 8) + AddrSize = 16; + else if (AddrSize > 4) + AddrSize = 8; + } + + return VAddrSize == AddrSize; +} + +bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) + return true; + if (!Desc.mayLoad() || !Desc.mayStore()) + return true; // Not atomic + + int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); + unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; + + // This is an incomplete check because image_atomic_cmpswap + // may only use 0x3 and 0xf while other atomic operations + // may use 0x1 and 0x3. However these limitations are + // verified when we check that dmask matches dst size. + return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; +} + +bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) + return true; + + int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); + unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; + + // GATHER4 instructions use dmask in a different fashion compared to + // other MIMG instructions. The only useful DMASK values are + // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns + // (red,red,red,red) etc.) The ISA document doesn't mention + // this. + return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; +} + +bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) + return true; + + int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); + if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { + if (isCI() || isSI()) + return false; + } + + return true; +} + +bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) + return true; + + int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); + if (DimIdx < 0) + return true; + + long Imm = Inst.getOperand(DimIdx).getImm(); + if (Imm < 0 || Imm >= 8) + return false; + + return true; +} + +static bool IsRevOpcode(const unsigned Opcode) +{ + switch (Opcode) { + case AMDGPU::V_SUBREV_F32_e32: + case AMDGPU::V_SUBREV_F32_e64: + case AMDGPU::V_SUBREV_F32_e32_gfx10: + case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: + case AMDGPU::V_SUBREV_F32_e32_vi: + case AMDGPU::V_SUBREV_F32_e64_gfx10: + case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: + case AMDGPU::V_SUBREV_F32_e64_vi: + + case AMDGPU::V_SUBREV_I32_e32: + case AMDGPU::V_SUBREV_I32_e64: + case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: + case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: + + case AMDGPU::V_SUBBREV_U32_e32: + case AMDGPU::V_SUBBREV_U32_e64: + case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: + case AMDGPU::V_SUBBREV_U32_e32_vi: + case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: + case AMDGPU::V_SUBBREV_U32_e64_vi: + + case AMDGPU::V_SUBREV_U32_e32: + case AMDGPU::V_SUBREV_U32_e64: + case AMDGPU::V_SUBREV_U32_e32_gfx9: + case AMDGPU::V_SUBREV_U32_e32_vi: + case AMDGPU::V_SUBREV_U32_e64_gfx9: + case AMDGPU::V_SUBREV_U32_e64_vi: + + case AMDGPU::V_SUBREV_F16_e32: + case AMDGPU::V_SUBREV_F16_e64: + case AMDGPU::V_SUBREV_F16_e32_gfx10: + case AMDGPU::V_SUBREV_F16_e32_vi: + case AMDGPU::V_SUBREV_F16_e64_gfx10: + case AMDGPU::V_SUBREV_F16_e64_vi: + + case AMDGPU::V_SUBREV_U16_e32: + case AMDGPU::V_SUBREV_U16_e64: + case AMDGPU::V_SUBREV_U16_e32_vi: + case AMDGPU::V_SUBREV_U16_e64_vi: + + case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: + case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: + case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: + + case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: + case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: + + case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: + case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: + + case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: + case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: + + case AMDGPU::V_LSHRREV_B32_e32: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: + case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: + case AMDGPU::V_LSHRREV_B32_e32_vi: + case AMDGPU::V_LSHRREV_B32_e64_vi: + case AMDGPU::V_LSHRREV_B32_e32_gfx10: + case AMDGPU::V_LSHRREV_B32_e64_gfx10: + + case AMDGPU::V_ASHRREV_I32_e32: + case AMDGPU::V_ASHRREV_I32_e64: + case AMDGPU::V_ASHRREV_I32_e32_gfx10: + case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: + case AMDGPU::V_ASHRREV_I32_e32_vi: + case AMDGPU::V_ASHRREV_I32_e64_gfx10: + case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: + case AMDGPU::V_ASHRREV_I32_e64_vi: + + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHLREV_B32_e64: + case AMDGPU::V_LSHLREV_B32_e32_gfx10: + case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: + case AMDGPU::V_LSHLREV_B32_e32_vi: + case AMDGPU::V_LSHLREV_B32_e64_gfx10: + case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: + case AMDGPU::V_LSHLREV_B32_e64_vi: + + case AMDGPU::V_LSHLREV_B16_e32: + case AMDGPU::V_LSHLREV_B16_e64: + case AMDGPU::V_LSHLREV_B16_e32_vi: + case AMDGPU::V_LSHLREV_B16_e64_vi: + case AMDGPU::V_LSHLREV_B16_gfx10: + + case AMDGPU::V_LSHRREV_B16_e32: + case AMDGPU::V_LSHRREV_B16_e64: + case AMDGPU::V_LSHRREV_B16_e32_vi: + case AMDGPU::V_LSHRREV_B16_e64_vi: + case AMDGPU::V_LSHRREV_B16_gfx10: + + case AMDGPU::V_ASHRREV_I16_e32: + case AMDGPU::V_ASHRREV_I16_e64: + case AMDGPU::V_ASHRREV_I16_e32_vi: + case AMDGPU::V_ASHRREV_I16_e64_vi: + case AMDGPU::V_ASHRREV_I16_gfx10: + + case AMDGPU::V_LSHLREV_B64: + case AMDGPU::V_LSHLREV_B64_gfx10: + case AMDGPU::V_LSHLREV_B64_vi: + + case AMDGPU::V_LSHRREV_B64: + case AMDGPU::V_LSHRREV_B64_gfx10: + case AMDGPU::V_LSHRREV_B64_vi: + + case AMDGPU::V_ASHRREV_I64: + case AMDGPU::V_ASHRREV_I64_gfx10: + case AMDGPU::V_ASHRREV_I64_vi: + + case AMDGPU::V_PK_LSHLREV_B16: + case AMDGPU::V_PK_LSHLREV_B16_gfx10: + case AMDGPU::V_PK_LSHLREV_B16_vi: + + case AMDGPU::V_PK_LSHRREV_B16: + case AMDGPU::V_PK_LSHRREV_B16_gfx10: + case AMDGPU::V_PK_LSHRREV_B16_vi: + case AMDGPU::V_PK_ASHRREV_I16: + case AMDGPU::V_PK_ASHRREV_I16_gfx10: + case AMDGPU::V_PK_ASHRREV_I16_vi: + return true; + default: + return false; + } +} + +bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { + + using namespace SIInstrFlags; + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + + // lds_direct register is defined so that it can be used + // with 9-bit operands only. Ignore encodings which do not accept these. + if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) + return true; + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + const int SrcIndices[] = { Src1Idx, Src2Idx }; + + // lds_direct cannot be specified as either src1 or src2. + for (int SrcIdx : SrcIndices) { + if (SrcIdx == -1) break; + const MCOperand &Src = Inst.getOperand(SrcIdx); + if (Src.isReg() && Src.getReg() == LDS_DIRECT) { + return false; + } + } + + if (Src0Idx == -1) + return true; + + const MCOperand &Src = Inst.getOperand(Src0Idx); + if (!Src.isReg() || Src.getReg() != LDS_DIRECT) + return true; + + // lds_direct is specified as src0. Check additional limitations. + return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); +} + +SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (Op.isFlatOffset()) + return Op.getStartLoc(); + } + return getLoc(); +} + +bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, + const OperandVector &Operands) { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if ((TSFlags & SIInstrFlags::FLAT) == 0) + return true; + + auto Opcode = Inst.getOpcode(); + auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); + assert(OpNum != -1); + + const auto &Op = Inst.getOperand(OpNum); + if (!hasFlatOffsets() && Op.getImm() != 0) { + Error(getFlatOffsetLoc(Operands), + "flat offset modifier is not supported on this GPU"); + return false; + } + + // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. + // For FLAT segment the offset must be positive; + // MSB is ignored and forced to zero. + unsigned OffsetSize = isGFX9() ? 13 : 12; + if (TSFlags & SIInstrFlags::IsNonFlatSeg) { + if (!isIntN(OffsetSize, Op.getImm())) { + Error(getFlatOffsetLoc(Operands), + isGFX9() ? "expected a 13-bit signed offset" : + "expected a 12-bit signed offset"); + return false; + } + } else { + if (!isUIntN(OffsetSize - 1, Op.getImm())) { + Error(getFlatOffsetLoc(Operands), + isGFX9() ? "expected a 12-bit unsigned offset" : + "expected an 11-bit unsigned offset"); + return false; + } + } + + return true; +} + +bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { + unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) + return true; + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + + const int OpIndices[] = { Src0Idx, Src1Idx }; + + unsigned NumLiterals = 0; + uint32_t LiteralValue; + + for (int OpIdx : OpIndices) { + if (OpIdx == -1) break; + + const MCOperand &MO = Inst.getOperand(OpIdx); + if (MO.isImm() && + // Exclude special imm operands (like that used by s_set_gpr_idx_on) + AMDGPU::isSISrcOperand(Desc, OpIdx) && + !isInlineConstant(Inst, OpIdx)) { + uint32_t Value = static_cast<uint32_t>(MO.getImm()); + if (NumLiterals == 0 || LiteralValue != Value) { + LiteralValue = Value; + ++NumLiterals; + } + } + } + + return NumLiterals <= 1; +} + +bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { + const unsigned Opc = Inst.getOpcode(); + if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || + Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + if (OpSel & ~3) + return false; + } + return true; +} + +// Check if VCC register matches wavefront size +bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { + auto FB = getFeatureBits(); + return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || + (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); +} + +// VOP3 literal is only allowed in GFX10+ and only one can be used +bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { + unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) + return true; + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + unsigned NumLiterals = 0; + uint32_t LiteralValue; + + for (int OpIdx : OpIndices) { + if (OpIdx == -1) break; + + const MCOperand &MO = Inst.getOperand(OpIdx); + if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) + continue; + + if (!isInlineConstant(Inst, OpIdx)) { + uint32_t Value = static_cast<uint32_t>(MO.getImm()); + if (NumLiterals == 0 || LiteralValue != Value) { + LiteralValue = Value; + ++NumLiterals; + } + } + } + + return !NumLiterals || + (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); +} + +bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, + const SMLoc &IDLoc, + const OperandVector &Operands) { + if (!validateLdsDirect(Inst)) { + Error(IDLoc, + "invalid use of lds_direct"); + return false; + } + if (!validateSOPLiteral(Inst)) { + Error(IDLoc, + "only one literal operand is allowed"); + return false; + } + if (!validateVOP3Literal(Inst)) { + Error(IDLoc, + "invalid literal operand"); + return false; + } + if (!validateConstantBusLimitations(Inst)) { + Error(IDLoc, + "invalid operand (violates constant bus restrictions)"); + return false; + } + if (!validateEarlyClobberLimitations(Inst)) { + Error(IDLoc, + "destination must be different than all sources"); + return false; + } + if (!validateIntClampSupported(Inst)) { + Error(IDLoc, + "integer clamping is not supported on this GPU"); + return false; + } + if (!validateOpSel(Inst)) { + Error(IDLoc, + "invalid op_sel operand"); + return false; + } + // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. + if (!validateMIMGD16(Inst)) { + Error(IDLoc, + "d16 modifier is not supported on this GPU"); + return false; + } + if (!validateMIMGDim(Inst)) { + Error(IDLoc, "dim modifier is required on this GPU"); + return false; + } + if (!validateMIMGDataSize(Inst)) { + Error(IDLoc, + "image data size does not match dmask and tfe"); + return false; + } + if (!validateMIMGAddrSize(Inst)) { + Error(IDLoc, + "image address size does not match dim and a16"); + return false; + } + if (!validateMIMGAtomicDMask(Inst)) { + Error(IDLoc, + "invalid atomic image dmask"); + return false; + } + if (!validateMIMGGatherDMask(Inst)) { + Error(IDLoc, + "invalid image_gather dmask: only one bit must be set"); + return false; + } + if (!validateFlatOffset(Inst, Operands)) { + return false; + } + + return true; +} + +static std::string AMDGPUMnemonicSpellCheck(StringRef S, + const FeatureBitset &FBS, + unsigned VariantID = 0); + +bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned Result = Match_Success; + for (auto Variant : getMatchedVariants()) { + uint64_t EI; + auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, + Variant); + // We order match statuses from least to most specific. We use most specific + // status as resulting + // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 + if ((R == Match_Success) || + (R == Match_PreferE32) || + (R == Match_MissingFeature && Result != Match_PreferE32) || + (R == Match_InvalidOperand && Result != Match_MissingFeature + && Result != Match_PreferE32) || + (R == Match_MnemonicFail && Result != Match_InvalidOperand + && Result != Match_MissingFeature + && Result != Match_PreferE32)) { + Result = R; + ErrorInfo = EI; + } + if (R == Match_Success) + break; + } + + switch (Result) { + default: break; + case Match_Success: + if (!validateInstruction(Inst, IDLoc, Operands)) { + return true; + } + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst, getSTI()); + return false; + + case Match_MissingFeature: + return Error(IDLoc, "instruction not supported on this GPU"); + + case Match_MnemonicFail: { + FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = AMDGPUMnemonicSpellCheck( + ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, + ((AMDGPUOperand &)*Operands[0]).getLocRange()); + } + + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) { + return Error(IDLoc, "too few operands for instruction"); + } + ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); + } + + case Match_PreferE32: + return Error(IDLoc, "internal error: instruction without _e64 suffix " + "should be encoded as e32"); + } + llvm_unreachable("Implement any new match types added!"); +} + +bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { + int64_t Tmp = -1; + if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { + return true; + } + if (getParser().parseAbsoluteExpression(Tmp)) { + return true; + } + Ret = static_cast<uint32_t>(Tmp); + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, + uint32_t &Minor) { + if (ParseAsAbsoluteExpression(Major)) + return TokError("invalid major version"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("minor version number required, comma expected"); + Lex(); + + if (ParseAsAbsoluteExpression(Minor)) + return TokError("invalid minor version"); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { + if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) + return TokError("directive only supported for amdgcn architecture"); + + std::string Target; + + SMLoc TargetStart = getTok().getLoc(); + if (getParser().parseEscapedString(Target)) + return true; + SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); + + std::string ExpectedTarget; + raw_string_ostream ExpectedTargetOS(ExpectedTarget); + IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); + + if (Target != ExpectedTargetOS.str()) + return getParser().Error(TargetRange.Start, "target must match options", + TargetRange); + + getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); + return false; +} + +bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { + return getParser().Error(Range.Start, "value out of range", Range); +} + +bool AMDGPUAsmParser::calculateGPRBlocks( + const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, + bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, + SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, + unsigned &VGPRBlocks, unsigned &SGPRBlocks) { + // TODO(scott.linder): These calculations are duplicated from + // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. + IsaVersion Version = getIsaVersion(getSTI().getCPU()); + + unsigned NumVGPRs = NextFreeVGPR; + unsigned NumSGPRs = NextFreeSGPR; + + if (Version.Major >= 10) + NumSGPRs = 0; + else { + unsigned MaxAddressableNumSGPRs = + IsaInfo::getAddressableNumSGPRs(&getSTI()); + + if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && + NumSGPRs > MaxAddressableNumSGPRs) + return OutOfRangeError(SGPRRange); + + NumSGPRs += + IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); + + if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && + NumSGPRs > MaxAddressableNumSGPRs) + return OutOfRangeError(SGPRRange); + + if (Features.test(FeatureSGPRInitBug)) + NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; + } + + VGPRBlocks = + IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); + SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { + if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) + return TokError("directive only supported for amdgcn architecture"); + + if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) + return TokError("directive only supported for amdhsa OS"); + + StringRef KernelName; + if (getParser().parseIdentifier(KernelName)) + return true; + + kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); + + StringSet<> Seen; + + IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); + + SMRange VGPRRange; + uint64_t NextFreeVGPR = 0; + SMRange SGPRRange; + uint64_t NextFreeSGPR = 0; + unsigned UserSGPRCount = 0; + bool ReserveVCC = true; + bool ReserveFlatScr = true; + bool ReserveXNACK = hasXNACK(); + Optional<bool> EnableWavefrontSize32; + + while (true) { + while (getLexer().is(AsmToken::EndOfStatement)) + Lex(); + + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); + + StringRef ID = getTok().getIdentifier(); + SMRange IDRange = getTok().getLocRange(); + Lex(); + + if (ID == ".end_amdhsa_kernel") + break; + + if (Seen.find(ID) != Seen.end()) + return TokError(".amdhsa_ directives cannot be repeated"); + Seen.insert(ID); + + SMLoc ValStart = getTok().getLoc(); + int64_t IVal; + if (getParser().parseAbsoluteExpression(IVal)) + return true; + SMLoc ValEnd = getTok().getLoc(); + SMRange ValRange = SMRange(ValStart, ValEnd); + + if (IVal < 0) + return OutOfRangeError(ValRange); + + uint64_t Val = IVal; + +#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ + if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ + return OutOfRangeError(RANGE); \ + AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); + + if (ID == ".amdhsa_group_segment_fixed_size") { + if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) + return OutOfRangeError(ValRange); + KD.group_segment_fixed_size = Val; + } else if (ID == ".amdhsa_private_segment_fixed_size") { + if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) + return OutOfRangeError(ValRange); + KD.private_segment_fixed_size = Val; + } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, + Val, ValRange); + UserSGPRCount += 4; + } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, + ValRange); + UserSGPRCount += 2; + } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, + ValRange); + UserSGPRCount += 2; + } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, + Val, ValRange); + UserSGPRCount += 2; + } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, + ValRange); + UserSGPRCount += 2; + } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, + ValRange); + UserSGPRCount += 2; + } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, + Val, ValRange); + UserSGPRCount += 1; + } else if (ID == ".amdhsa_wavefront_size32") { + if (IVersion.Major < 10) + return getParser().Error(IDRange.Start, "directive requires gfx10+", + IDRange); + EnableWavefrontSize32 = Val; + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, + Val, ValRange); + } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { + PARSE_BITS_ENTRY( + KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, + ValRange); + } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, + ValRange); + } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, + ValRange); + } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, + ValRange); + } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, + ValRange); + } else if (ID == ".amdhsa_system_vgpr_workitem_id") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, + ValRange); + } else if (ID == ".amdhsa_next_free_vgpr") { + VGPRRange = ValRange; + NextFreeVGPR = Val; + } else if (ID == ".amdhsa_next_free_sgpr") { + SGPRRange = ValRange; + NextFreeSGPR = Val; + } else if (ID == ".amdhsa_reserve_vcc") { + if (!isUInt<1>(Val)) + return OutOfRangeError(ValRange); + ReserveVCC = Val; + } else if (ID == ".amdhsa_reserve_flat_scratch") { + if (IVersion.Major < 7) + return getParser().Error(IDRange.Start, "directive requires gfx7+", + IDRange); + if (!isUInt<1>(Val)) + return OutOfRangeError(ValRange); + ReserveFlatScr = Val; + } else if (ID == ".amdhsa_reserve_xnack_mask") { + if (IVersion.Major < 8) + return getParser().Error(IDRange.Start, "directive requires gfx8+", + IDRange); + if (!isUInt<1>(Val)) + return OutOfRangeError(ValRange); + ReserveXNACK = Val; + } else if (ID == ".amdhsa_float_round_mode_32") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); + } else if (ID == ".amdhsa_float_round_mode_16_64") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); + } else if (ID == ".amdhsa_float_denorm_mode_32") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); + } else if (ID == ".amdhsa_float_denorm_mode_16_64") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, + ValRange); + } else if (ID == ".amdhsa_dx10_clamp") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); + } else if (ID == ".amdhsa_ieee_mode") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, + Val, ValRange); + } else if (ID == ".amdhsa_fp16_overflow") { + if (IVersion.Major < 9) + return getParser().Error(IDRange.Start, "directive requires gfx9+", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, + ValRange); + } else if (ID == ".amdhsa_workgroup_processor_mode") { + if (IVersion.Major < 10) + return getParser().Error(IDRange.Start, "directive requires gfx10+", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, + ValRange); + } else if (ID == ".amdhsa_memory_ordered") { + if (IVersion.Major < 10) + return getParser().Error(IDRange.Start, "directive requires gfx10+", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, + ValRange); + } else if (ID == ".amdhsa_forward_progress") { + if (IVersion.Major < 10) + return getParser().Error(IDRange.Start, "directive requires gfx10+", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, + ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { + PARSE_BITS_ENTRY( + KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, + ValRange); + } else if (ID == ".amdhsa_exception_fp_denorm_src") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, + Val, ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { + PARSE_BITS_ENTRY( + KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, + ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, + Val, ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, + Val, ValRange); + } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, + Val, ValRange); + } else if (ID == ".amdhsa_exception_int_div_zero") { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, + Val, ValRange); + } else { + return getParser().Error(IDRange.Start, + "unknown .amdhsa_kernel directive", IDRange); + } + +#undef PARSE_BITS_ENTRY + } + + if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) + return TokError(".amdhsa_next_free_vgpr directive is required"); + + if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) + return TokError(".amdhsa_next_free_sgpr directive is required"); + + unsigned VGPRBlocks; + unsigned SGPRBlocks; + if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, + ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, + VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, + SGPRBlocks)) + return true; + + if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( + VGPRBlocks)) + return OutOfRangeError(VGPRRange); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); + + if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( + SGPRBlocks)) + return OutOfRangeError(SGPRRange); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, + SGPRBlocks); + + if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) + return TokError("too many user SGPRs enabled"); + AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, + UserSGPRCount); + + getTargetStreamer().EmitAmdhsaKernelDescriptor( + getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, + ReserveFlatScr, ReserveXNACK); + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { + uint32_t Major; + uint32_t Minor; + + if (ParseDirectiveMajorMinor(Major, Minor)) + return true; + + getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { + uint32_t Major; + uint32_t Minor; + uint32_t Stepping; + StringRef VendorName; + StringRef ArchName; + + // If this directive has no arguments, then use the ISA version for the + // targeted GPU. + if (getLexer().is(AsmToken::EndOfStatement)) { + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); + getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, + ISA.Stepping, + "AMD", "AMDGPU"); + return false; + } + + if (ParseDirectiveMajorMinor(Major, Minor)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("stepping version number required, comma expected"); + Lex(); + + if (ParseAsAbsoluteExpression(Stepping)) + return TokError("invalid stepping version"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("vendor name required, comma expected"); + Lex(); + + if (getLexer().isNot(AsmToken::String)) + return TokError("invalid vendor name"); + + VendorName = getLexer().getTok().getStringContents(); + Lex(); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("arch name required, comma expected"); + Lex(); + + if (getLexer().isNot(AsmToken::String)) + return TokError("invalid arch name"); + + ArchName = getLexer().getTok().getStringContents(); + Lex(); + + getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, + VendorName, ArchName); + return false; +} + +bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, + amd_kernel_code_t &Header) { + // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing + // assembly for backwards compatibility. + if (ID == "max_scratch_backing_memory_byte_size") { + Parser.eatToEndOfStatement(); + return false; + } + + SmallString<40> ErrStr; + raw_svector_ostream Err(ErrStr); + if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { + return TokError(Err.str()); + } + Lex(); + + if (ID == "enable_wavefront_size32") { + if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { + if (!isGFX10()) + return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); + if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) + return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); + } else { + if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) + return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); + } + } + + if (ID == "wavefront_size") { + if (Header.wavefront_size == 5) { + if (!isGFX10()) + return TokError("wavefront_size=5 is only allowed on GFX10+"); + if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) + return TokError("wavefront_size=5 requires +WavefrontSize32"); + } else if (Header.wavefront_size == 6) { + if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) + return TokError("wavefront_size=6 requires +WavefrontSize64"); + } + } + + if (ID == "enable_wgp_mode") { + if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) + return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); + } + + if (ID == "enable_mem_ordered") { + if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) + return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); + } + + if (ID == "enable_fwd_progress") { + if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) + return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); + } + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { + amd_kernel_code_t Header; + AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); + + while (true) { + // Lex EndOfStatement. This is in a while loop, because lexing a comment + // will set the current token to EndOfStatement. + while(getLexer().is(AsmToken::EndOfStatement)) + Lex(); + + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected value identifier or .end_amd_kernel_code_t"); + + StringRef ID = getLexer().getTok().getIdentifier(); + Lex(); + + if (ID == ".end_amd_kernel_code_t") + break; + + if (ParseAMDKernelCodeTValue(ID, Header)) + return true; + } + + getTargetStreamer().EmitAMDKernelCodeT(Header); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected symbol name"); + + StringRef KernelName = Parser.getTok().getString(); + + getTargetStreamer().EmitAMDGPUSymbolType(KernelName, + ELF::STT_AMDGPU_HSA_KERNEL); + Lex(); + if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) + KernelScope.initialize(getContext()); + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveISAVersion() { + if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { + return Error(getParser().getTok().getLoc(), + ".amd_amdgpu_isa directive is not available on non-amdgcn " + "architectures"); + } + + auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); + + std::string ISAVersionStringFromSTI; + raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); + IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); + + if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { + return Error(getParser().getTok().getLoc(), + ".amd_amdgpu_isa directive does not match triple and/or mcpu " + "arguments specified through the command line"); + } + + getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); + Lex(); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { + const char *AssemblerDirectiveBegin; + const char *AssemblerDirectiveEnd; + std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = + AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) + ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, + HSAMD::V3::AssemblerDirectiveEnd) + : std::make_tuple(HSAMD::AssemblerDirectiveBegin, + HSAMD::AssemblerDirectiveEnd); + + if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { + return Error(getParser().getTok().getLoc(), + (Twine(AssemblerDirectiveBegin) + Twine(" directive is " + "not available on non-amdhsa OSes")).str()); + } + + std::string HSAMetadataString; + if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, + HSAMetadataString)) + return true; + + if (IsaInfo::hasCodeObjectV3(&getSTI())) { + if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } else { + if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } + + return false; +} + +/// Common code to parse out a block of text (typically YAML) between start and +/// end directives. +bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, + const char *AssemblerDirectiveEnd, + std::string &CollectString) { + + raw_string_ostream CollectStream(CollectString); + + getLexer().setSkipSpace(false); + + bool FoundEnd = false; + while (!getLexer().is(AsmToken::Eof)) { + while (getLexer().is(AsmToken::Space)) { + CollectStream << getLexer().getTok().getString(); + Lex(); + } + + if (getLexer().is(AsmToken::Identifier)) { + StringRef ID = getLexer().getTok().getIdentifier(); + if (ID == AssemblerDirectiveEnd) { + Lex(); + FoundEnd = true; + break; + } + } + + CollectStream << Parser.parseStringToEndOfStatement() + << getContext().getAsmInfo()->getSeparatorString(); + + Parser.eatToEndOfStatement(); + } + + getLexer().setSkipSpace(true); + + if (getLexer().is(AsmToken::Eof) && !FoundEnd) { + return TokError(Twine("expected directive ") + + Twine(AssemblerDirectiveEnd) + Twine(" not found")); + } + + CollectStream.flush(); + return false; +} + +/// Parse the assembler directive for new MsgPack-format PAL metadata. +bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { + std::string String; + if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, + AMDGPU::PALMD::AssemblerDirectiveEnd, String)) + return true; + + auto PALMetadata = getTargetStreamer().getPALMetadata(); + if (!PALMetadata->setFromString(String)) + return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); + return false; +} + +/// Parse the assembler directive for old linear-format PAL metadata. +bool AMDGPUAsmParser::ParseDirectivePALMetadata() { + if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { + return Error(getParser().getTok().getLoc(), + (Twine(PALMD::AssemblerDirective) + Twine(" directive is " + "not available on non-amdpal OSes")).str()); + } + + auto PALMetadata = getTargetStreamer().getPALMetadata(); + PALMetadata->setLegacy(); + for (;;) { + uint32_t Key, Value; + if (ParseAsAbsoluteExpression(Key)) { + return TokError(Twine("invalid value in ") + + Twine(PALMD::AssemblerDirective)); + } + if (getLexer().isNot(AsmToken::Comma)) { + return TokError(Twine("expected an even number of values in ") + + Twine(PALMD::AssemblerDirective)); + } + Lex(); + if (ParseAsAbsoluteExpression(Value)) { + return TokError(Twine("invalid value in ") + + Twine(PALMD::AssemblerDirective)); + } + PALMetadata->setRegister(Key, Value); + if (getLexer().isNot(AsmToken::Comma)) + break; + Lex(); + } + return false; +} + +/// ParseDirectiveAMDGPULDS +/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] +bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { + if (getParser().checkForValidSection()) + return true; + + StringRef Name; + SMLoc NameLoc = getLexer().getLoc(); + if (getParser().parseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); + if (parseToken(AsmToken::Comma, "expected ','")) + return true; + + unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(Size)) + return true; + if (Size < 0) + return Error(SizeLoc, "size must be non-negative"); + if (Size > LocalMemorySize) + return Error(SizeLoc, "size is too large"); + + int64_t Align = 4; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + SMLoc AlignLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(Align)) + return true; + if (Align < 0 || !isPowerOf2_64(Align)) + return Error(AlignLoc, "alignment must be a power of two"); + + // Alignment larger than the size of LDS is possible in theory, as long + // as the linker manages to place to symbol at address 0, but we do want + // to make sure the alignment fits nicely into a 32-bit integer. + if (Align >= 1u << 31) + return Error(AlignLoc, "alignment is too large"); + } + + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.amdgpu_lds' directive")) + return true; + + Symbol->redefineIfPossible(); + if (!Symbol->isUndefined()) + return Error(NameLoc, "invalid symbol redefinition"); + + getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); + return false; +} + +bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getString(); + + if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { + if (IDVal == ".amdgcn_target") + return ParseDirectiveAMDGCNTarget(); + + if (IDVal == ".amdhsa_kernel") + return ParseDirectiveAMDHSAKernel(); + + // TODO: Restructure/combine with PAL metadata directive. + if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) + return ParseDirectiveHSAMetadata(); + } else { + if (IDVal == ".hsa_code_object_version") + return ParseDirectiveHSACodeObjectVersion(); + + if (IDVal == ".hsa_code_object_isa") + return ParseDirectiveHSACodeObjectISA(); + + if (IDVal == ".amd_kernel_code_t") + return ParseDirectiveAMDKernelCodeT(); + + if (IDVal == ".amdgpu_hsa_kernel") + return ParseDirectiveAMDGPUHsaKernel(); + + if (IDVal == ".amd_amdgpu_isa") + return ParseDirectiveISAVersion(); + + if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) + return ParseDirectiveHSAMetadata(); + } + + if (IDVal == ".amdgpu_lds") + return ParseDirectiveAMDGPULDS(); + + if (IDVal == PALMD::AssemblerDirectiveBegin) + return ParseDirectivePALMetadataBegin(); + + if (IDVal == PALMD::AssemblerDirective) + return ParseDirectivePALMetadata(); + + return true; +} + +bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, + unsigned RegNo) const { + + for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); + R.isValid(); ++R) { + if (*R == RegNo) + return isGFX9() || isGFX10(); + } + + // GFX10 has 2 more SGPRs 104 and 105. + for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); + R.isValid(); ++R) { + if (*R == RegNo) + return hasSGPR104_SGPR105(); + } + + switch (RegNo) { + case AMDGPU::SRC_SHARED_BASE: + case AMDGPU::SRC_SHARED_LIMIT: + case AMDGPU::SRC_PRIVATE_BASE: + case AMDGPU::SRC_PRIVATE_LIMIT: + case AMDGPU::SRC_POPS_EXITING_WAVE_ID: + return !isCI() && !isSI() && !isVI(); + case AMDGPU::TBA: + case AMDGPU::TBA_LO: + case AMDGPU::TBA_HI: + case AMDGPU::TMA: + case AMDGPU::TMA_LO: + case AMDGPU::TMA_HI: + return !isGFX9() && !isGFX10(); + case AMDGPU::XNACK_MASK: + case AMDGPU::XNACK_MASK_LO: + case AMDGPU::XNACK_MASK_HI: + return !isCI() && !isSI() && !isGFX10() && hasXNACK(); + case AMDGPU::SGPR_NULL: + return isGFX10(); + default: + break; + } + + if (isCI()) + return true; + + if (isSI() || isGFX10()) { + // No flat_scr on SI. + // On GFX10 flat scratch is not a valid register operand and can only be + // accessed with s_setreg/s_getreg. + switch (RegNo) { + case AMDGPU::FLAT_SCR: + case AMDGPU::FLAT_SCR_LO: + case AMDGPU::FLAT_SCR_HI: + return false; + default: + return true; + } + } + + // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that + // SI/CI have. + for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); + R.isValid(); ++R) { + if (*R == RegNo) + return hasSGPR102_SGPR103(); + } + + return true; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, + OperandMode Mode) { + // Try to parse with a custom parser + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + + // If we successfully parsed the operand or if there as an error parsing, + // we are done. + // + // If we are parsing after we reach EndOfStatement then this means we + // are appending default values to the Operands list. This is only done + // by custom parser, so we shouldn't continue on to the generic parsing. + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || + getLexer().is(AsmToken::EndOfStatement)) + return ResTy; + + if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { + unsigned Prefix = Operands.size(); + SMLoc LBraceLoc = getTok().getLoc(); + Parser.Lex(); // eat the '[' + + for (;;) { + ResTy = parseReg(Operands); + if (ResTy != MatchOperand_Success) + return ResTy; + + if (getLexer().is(AsmToken::RBrac)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return MatchOperand_ParseFail; + Parser.Lex(); + } + + if (Operands.size() - Prefix > 1) { + Operands.insert(Operands.begin() + Prefix, + AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); + Operands.push_back(AMDGPUOperand::CreateToken(this, "]", + getTok().getLoc())); + } + + Parser.Lex(); // eat the ']' + return MatchOperand_Success; + } + + return parseRegOrImm(Operands); +} + +StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { + // Clear any forced encodings from the previous instruction. + setForcedEncodingSize(0); + setForcedDPP(false); + setForcedSDWA(false); + + if (Name.endswith("_e64")) { + setForcedEncodingSize(64); + return Name.substr(0, Name.size() - 4); + } else if (Name.endswith("_e32")) { + setForcedEncodingSize(32); + return Name.substr(0, Name.size() - 4); + } else if (Name.endswith("_dpp")) { + setForcedDPP(true); + return Name.substr(0, Name.size() - 4); + } else if (Name.endswith("_sdwa")) { + setForcedSDWA(true); + return Name.substr(0, Name.size() - 5); + } + return Name; +} + +bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { + // Add the instruction mnemonic + Name = parseMnemonicSuffix(Name); + Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); + + bool IsMIMG = Name.startswith("image_"); + + while (!getLexer().is(AsmToken::EndOfStatement)) { + OperandMode Mode = OperandMode_Default; + if (IsMIMG && isGFX10() && Operands.size() == 2) + Mode = OperandMode_NSA; + OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); + + // Eat the comma or space if there is one. + if (getLexer().is(AsmToken::Comma)) + Parser.Lex(); + + switch (Res) { + case MatchOperand_Success: break; + case MatchOperand_ParseFail: + // FIXME: use real operand location rather than the current location. + Error(getLexer().getLoc(), "failed parsing operand."); + while (!getLexer().is(AsmToken::EndOfStatement)) { + Parser.Lex(); + } + return true; + case MatchOperand_NoMatch: + // FIXME: use real operand location rather than the current location. + Error(getLexer().getLoc(), "not a valid operand."); + while (!getLexer().is(AsmToken::EndOfStatement)) { + Parser.Lex(); + } + return true; + } + } + + return false; +} + +//===----------------------------------------------------------------------===// +// Utility functions +//===----------------------------------------------------------------------===// + +OperandMatchResultTy +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { + + if (!trySkipId(Prefix, AsmToken::Colon)) + return MatchOperand_NoMatch; + + return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy, + bool (*ConvertResult)(int64_t&)) { + SMLoc S = getLoc(); + int64_t Value = 0; + + OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); + if (Res != MatchOperand_Success) + return Res; + + if (ConvertResult && !ConvertResult(Value)) { + Error(S, "invalid " + StringRef(Prefix) + " value."); + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, + OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy, + bool (*ConvertResult)(int64_t&)) { + SMLoc S = getLoc(); + if (!trySkipId(Prefix, AsmToken::Colon)) + return MatchOperand_NoMatch; + + if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) + return MatchOperand_ParseFail; + + unsigned Val = 0; + const unsigned MaxSize = 4; + + // FIXME: How to verify the number of elements matches the number of src + // operands? + for (int I = 0; ; ++I) { + int64_t Op; + SMLoc Loc = getLoc(); + if (!parseExpr(Op)) + return MatchOperand_ParseFail; + + if (Op != 0 && Op != 1) { + Error(Loc, "invalid " + StringRef(Prefix) + " value."); + return MatchOperand_ParseFail; + } + + Val |= (Op << I); + + if (trySkipToken(AsmToken::RBrac)) + break; + + if (I + 1 == MaxSize) { + Error(getLoc(), "expected a closing square bracket"); + return MatchOperand_ParseFail; + } + + if (!skipToken(AsmToken::Comma, "expected a comma")) + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy) { + int64_t Bit = 0; + SMLoc S = Parser.getTok().getLoc(); + + // We are at the end of the statement, and this is a default argument, so + // use a default value. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + switch(getLexer().getKind()) { + case AsmToken::Identifier: { + StringRef Tok = Parser.getTok().getString(); + if (Tok == Name) { + if (Tok == "r128" && isGFX9()) + Error(S, "r128 modifier is not supported on this GPU"); + if (Tok == "a16" && !isGFX9() && !isGFX10()) + Error(S, "a16 modifier is not supported on this GPU"); + Bit = 1; + Parser.Lex(); + } else if (Tok.startswith("no") && Tok.endswith(Name)) { + Bit = 0; + Parser.Lex(); + } else { + return MatchOperand_NoMatch; + } + break; + } + default: + return MatchOperand_NoMatch; + } + } + + if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) + return MatchOperand_ParseFail; + + Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); + return MatchOperand_Success; +} + +static void addOptionalImmOperand( + MCInst& Inst, const OperandVector& Operands, + AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, + AMDGPUOperand::ImmTy ImmT, + int64_t Default = 0) { + auto i = OptionalIdx.find(ImmT); + if (i != OptionalIdx.end()) { + unsigned Idx = i->second; + ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); + } else { + Inst.addOperand(MCOperand::createImm(Default)); + } +} + +OperandMatchResultTy +AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { + if (getLexer().isNot(AsmToken::Identifier)) { + return MatchOperand_NoMatch; + } + StringRef Tok = Parser.getTok().getString(); + if (Tok != Prefix) { + return MatchOperand_NoMatch; + } + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) { + return MatchOperand_ParseFail; + } + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Identifier)) { + return MatchOperand_ParseFail; + } + + Value = Parser.getTok().getString(); + return MatchOperand_Success; +} + +// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their +// values to live in a joint format operand in the MCInst encoding. +OperandMatchResultTy +AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int64_t Dfmt = 0, Nfmt = 0; + // dfmt and nfmt can appear in either order, and each is optional. + bool GotDfmt = false, GotNfmt = false; + while (!GotDfmt || !GotNfmt) { + if (!GotDfmt) { + auto Res = parseIntWithPrefix("dfmt", Dfmt); + if (Res != MatchOperand_NoMatch) { + if (Res != MatchOperand_Success) + return Res; + if (Dfmt >= 16) { + Error(Parser.getTok().getLoc(), "out of range dfmt"); + return MatchOperand_ParseFail; + } + GotDfmt = true; + Parser.Lex(); + continue; + } + } + if (!GotNfmt) { + auto Res = parseIntWithPrefix("nfmt", Nfmt); + if (Res != MatchOperand_NoMatch) { + if (Res != MatchOperand_Success) + return Res; + if (Nfmt >= 8) { + Error(Parser.getTok().getLoc(), "out of range nfmt"); + return MatchOperand_ParseFail; + } + GotNfmt = true; + Parser.Lex(); + continue; + } + } + break; + } + if (!GotDfmt && !GotNfmt) + return MatchOperand_NoMatch; + auto Format = Dfmt | Nfmt << 4; + Operands.push_back( + AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); + return MatchOperand_Success; +} + +//===----------------------------------------------------------------------===// +// ds +//===----------------------------------------------------------------------===// + +void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, + const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); + + Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 +} + +void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, + bool IsGdsHardcoded) { + OptionalImmIndexMap OptionalIdx; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + if (Op.isToken() && Op.getToken() == "gds") { + IsGdsHardcoded = true; + continue; + } + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + AMDGPUOperand::ImmTy OffsetType = + (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || + Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || + Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : + AMDGPUOperand::ImmTyOffset; + + addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); + + if (!IsGdsHardcoded) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); + } + Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 +} + +void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + + unsigned OperandIdx[4]; + unsigned EnMask = 0; + int SrcIdx = 0; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + assert(SrcIdx < 4); + OperandIdx[SrcIdx] = Inst.size(); + Op.addRegOperands(Inst, 1); + ++SrcIdx; + continue; + } + + if (Op.isOff()) { + assert(SrcIdx < 4); + OperandIdx[SrcIdx] = Inst.size(); + Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); + ++SrcIdx; + continue; + } + + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { + Op.addImmOperands(Inst, 1); + continue; + } + + if (Op.isToken() && Op.getToken() == "done") + continue; + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + assert(SrcIdx == 4); + + bool Compr = false; + if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { + Compr = true; + Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); + Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); + Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); + } + + for (auto i = 0; i < SrcIdx; ++i) { + if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { + EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); + } + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); + + Inst.addOperand(MCOperand::createImm(EnMask)); +} + +//===----------------------------------------------------------------------===// +// s_waitcnt +//===----------------------------------------------------------------------===// + +static bool +encodeCnt( + const AMDGPU::IsaVersion ISA, + int64_t &IntVal, + int64_t CntVal, + bool Saturate, + unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), + unsigned (*decode)(const IsaVersion &Version, unsigned)) +{ + bool Failed = false; + + IntVal = encode(ISA, IntVal, CntVal); + if (CntVal != decode(ISA, IntVal)) { + if (Saturate) { + IntVal = encode(ISA, IntVal, -1); + } else { + Failed = true; + } + } + return Failed; +} + +bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { + + SMLoc CntLoc = getLoc(); + StringRef CntName = getTokenStr(); + + if (!skipToken(AsmToken::Identifier, "expected a counter name") || + !skipToken(AsmToken::LParen, "expected a left parenthesis")) + return false; + + int64_t CntVal; + SMLoc ValLoc = getLoc(); + if (!parseExpr(CntVal)) + return false; + + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); + + bool Failed = true; + bool Sat = CntName.endswith("_sat"); + + if (CntName == "vmcnt" || CntName == "vmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); + } else if (CntName == "expcnt" || CntName == "expcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); + } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); + } else { + Error(CntLoc, "invalid counter name " + CntName); + return false; + } + + if (Failed) { + Error(ValLoc, "too large value for " + CntName); + return false; + } + + if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) + return false; + + if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { + if (isToken(AsmToken::EndOfStatement)) { + Error(getLoc(), "expected a counter name"); + return false; + } + } + + return true; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); + int64_t Waitcnt = getWaitcntBitMask(ISA); + SMLoc S = getLoc(); + + // If parse failed, do not return error code + // to avoid excessive error messages. + if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { + while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); + } else { + parseExpr(Waitcnt); + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); + return MatchOperand_Success; +} + +bool +AMDGPUOperand::isSWaitCnt() const { + return isImm(); +} + +//===----------------------------------------------------------------------===// +// hwreg +//===----------------------------------------------------------------------===// + +bool +AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, + int64_t &Offset, + int64_t &Width) { + using namespace llvm::AMDGPU::Hwreg; + + // The register may be specified by name or using a numeric code + if (isToken(AsmToken::Identifier) && + (HwReg.Id = getHwregId(getTokenStr())) >= 0) { + HwReg.IsSymbolic = true; + lex(); // skip message name + } else if (!parseExpr(HwReg.Id)) { + return false; + } + + if (trySkipToken(AsmToken::RParen)) + return true; + + // parse optional params + return + skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && + parseExpr(Offset) && + skipToken(AsmToken::Comma, "expected a comma") && + parseExpr(Width) && + skipToken(AsmToken::RParen, "expected a closing parenthesis"); +} + +bool +AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, + const int64_t Offset, + const int64_t Width, + const SMLoc Loc) { + + using namespace llvm::AMDGPU::Hwreg; + + if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { + Error(Loc, "specified hardware register is not supported on this GPU"); + return false; + } else if (!isValidHwreg(HwReg.Id)) { + Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); + return false; + } else if (!isValidHwregOffset(Offset)) { + Error(Loc, "invalid bit offset: only 5-bit values are legal"); + return false; + } else if (!isValidHwregWidth(Width)) { + Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); + return false; + } + return true; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { + using namespace llvm::AMDGPU::Hwreg; + + int64_t ImmVal = 0; + SMLoc Loc = getLoc(); + + // If parse failed, do not return error code + // to avoid excessive error messages. + if (trySkipId("hwreg", AsmToken::LParen)) { + OperandInfoTy HwReg(ID_UNKNOWN_); + int64_t Offset = OFFSET_DEFAULT_; + int64_t Width = WIDTH_DEFAULT_; + if (parseHwregBody(HwReg, Offset, Width) && + validateHwreg(HwReg, Offset, Width, Loc)) { + ImmVal = encodeHwreg(HwReg.Id, Offset, Width); + } + } else if (parseExpr(ImmVal)) { + if (ImmVal < 0 || !isUInt<16>(ImmVal)) + Error(Loc, "invalid immediate: only 16-bit values are legal"); + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); + return MatchOperand_Success; +} + +bool AMDGPUOperand::isHwreg() const { + return isImmTy(ImmTyHwreg); +} + +//===----------------------------------------------------------------------===// +// sendmsg +//===----------------------------------------------------------------------===// + +bool +AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, + OperandInfoTy &Op, + OperandInfoTy &Stream) { + using namespace llvm::AMDGPU::SendMsg; + + if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { + Msg.IsSymbolic = true; + lex(); // skip message name + } else if (!parseExpr(Msg.Id)) { + return false; + } + + if (trySkipToken(AsmToken::Comma)) { + Op.IsDefined = true; + if (isToken(AsmToken::Identifier) && + (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { + lex(); // skip operation name + } else if (!parseExpr(Op.Id)) { + return false; + } + + if (trySkipToken(AsmToken::Comma)) { + Stream.IsDefined = true; + if (!parseExpr(Stream.Id)) + return false; + } + } + + return skipToken(AsmToken::RParen, "expected a closing parenthesis"); +} + +bool +AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, + const OperandInfoTy &Op, + const OperandInfoTy &Stream, + const SMLoc S) { + using namespace llvm::AMDGPU::SendMsg; + + // Validation strictness depends on whether message is specified + // in a symbolc or in a numeric form. In the latter case + // only encoding possibility is checked. + bool Strict = Msg.IsSymbolic; + + if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { + Error(S, "invalid message id"); + return false; + } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { + Error(S, Op.IsDefined ? + "message does not support operations" : + "missing message operation"); + return false; + } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { + Error(S, "invalid operation id"); + return false; + } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { + Error(S, "message operation does not support streams"); + return false; + } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { + Error(S, "invalid message stream id"); + return false; + } + return true; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { + using namespace llvm::AMDGPU::SendMsg; + + int64_t ImmVal = 0; + SMLoc Loc = getLoc(); + + // If parse failed, do not return error code + // to avoid excessive error messages. + if (trySkipId("sendmsg", AsmToken::LParen)) { + OperandInfoTy Msg(ID_UNKNOWN_); + OperandInfoTy Op(OP_NONE_); + OperandInfoTy Stream(STREAM_ID_NONE_); + if (parseSendMsgBody(Msg, Op, Stream) && + validateSendMsg(Msg, Op, Stream, Loc)) { + ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); + } + } else if (parseExpr(ImmVal)) { + if (ImmVal < 0 || !isUInt<16>(ImmVal)) + Error(Loc, "invalid immediate: only 16-bit values are legal"); + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); + return MatchOperand_Success; +} + +bool AMDGPUOperand::isSendMsg() const { + return isImmTy(ImmTySendMsg); +} + +//===----------------------------------------------------------------------===// +// v_interp +//===----------------------------------------------------------------------===// + +OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { + if (getLexer().getKind() != AsmToken::Identifier) + return MatchOperand_NoMatch; + + StringRef Str = Parser.getTok().getString(); + int Slot = StringSwitch<int>(Str) + .Case("p10", 0) + .Case("p20", 1) + .Case("p0", 2) + .Default(-1); + + SMLoc S = Parser.getTok().getLoc(); + if (Slot == -1) + return MatchOperand_ParseFail; + + Parser.Lex(); + Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, + AMDGPUOperand::ImmTyInterpSlot)); + return MatchOperand_Success; +} + +OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { + if (getLexer().getKind() != AsmToken::Identifier) + return MatchOperand_NoMatch; + + StringRef Str = Parser.getTok().getString(); + if (!Str.startswith("attr")) + return MatchOperand_NoMatch; + + StringRef Chan = Str.take_back(2); + int AttrChan = StringSwitch<int>(Chan) + .Case(".x", 0) + .Case(".y", 1) + .Case(".z", 2) + .Case(".w", 3) + .Default(-1); + if (AttrChan == -1) + return MatchOperand_ParseFail; + + Str = Str.drop_back(2).drop_front(4); + + uint8_t Attr; + if (Str.getAsInteger(10, Attr)) + return MatchOperand_ParseFail; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); + if (Attr > 63) { + Error(S, "out of bounds attr"); + return MatchOperand_Success; + } + + SMLoc SChan = SMLoc::getFromPointer(Chan.data()); + + Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, + AMDGPUOperand::ImmTyInterpAttr)); + Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, + AMDGPUOperand::ImmTyAttrChan)); + return MatchOperand_Success; +} + +//===----------------------------------------------------------------------===// +// exp +//===----------------------------------------------------------------------===// + +void AMDGPUAsmParser::errorExpTgt() { + Error(Parser.getTok().getLoc(), "invalid exp target"); +} + +OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, + uint8_t &Val) { + if (Str == "null") { + Val = 9; + return MatchOperand_Success; + } + + if (Str.startswith("mrt")) { + Str = Str.drop_front(3); + if (Str == "z") { // == mrtz + Val = 8; + return MatchOperand_Success; + } + + if (Str.getAsInteger(10, Val)) + return MatchOperand_ParseFail; + + if (Val > 7) + errorExpTgt(); + + return MatchOperand_Success; + } + + if (Str.startswith("pos")) { + Str = Str.drop_front(3); + if (Str.getAsInteger(10, Val)) + return MatchOperand_ParseFail; + + if (Val > 4 || (Val == 4 && !isGFX10())) + errorExpTgt(); + + Val += 12; + return MatchOperand_Success; + } + + if (isGFX10() && Str == "prim") { + Val = 20; + return MatchOperand_Success; + } + + if (Str.startswith("param")) { + Str = Str.drop_front(5); + if (Str.getAsInteger(10, Val)) + return MatchOperand_ParseFail; + + if (Val >= 32) + errorExpTgt(); + + Val += 32; + return MatchOperand_Success; + } + + if (Str.startswith("invalid_target_")) { + Str = Str.drop_front(15); + if (Str.getAsInteger(10, Val)) + return MatchOperand_ParseFail; + + errorExpTgt(); + return MatchOperand_Success; + } + + return MatchOperand_NoMatch; +} + +OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { + uint8_t Val; + StringRef Str = Parser.getTok().getString(); + + auto Res = parseExpTgtImpl(Str, Val); + if (Res != MatchOperand_Success) + return Res; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); + + Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, + AMDGPUOperand::ImmTyExpTgt)); + return MatchOperand_Success; +} + +//===----------------------------------------------------------------------===// +// parser helpers +//===----------------------------------------------------------------------===// + +bool +AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { + return Token.is(AsmToken::Identifier) && Token.getString() == Id; +} + +bool +AMDGPUAsmParser::isId(const StringRef Id) const { + return isId(getToken(), Id); +} + +bool +AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { + return getTokenKind() == Kind; +} + +bool +AMDGPUAsmParser::trySkipId(const StringRef Id) { + if (isId(Id)) { + lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { + if (isId(Id) && peekToken().is(Kind)) { + lex(); + lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { + if (isToken(Kind)) { + lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, + const StringRef ErrMsg) { + if (!trySkipToken(Kind)) { + Error(getLoc(), ErrMsg); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseExpr(int64_t &Imm) { + return !getParser().parseAbsoluteExpression(Imm); +} + +bool +AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { + if (isToken(AsmToken::String)) { + Val = getToken().getStringContents(); + lex(); + return true; + } else { + Error(getLoc(), ErrMsg); + return false; + } +} + +AsmToken +AMDGPUAsmParser::getToken() const { + return Parser.getTok(); +} + +AsmToken +AMDGPUAsmParser::peekToken() { + return getLexer().peekTok(); +} + +void +AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { + auto TokCount = getLexer().peekTokens(Tokens); + + for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) + Tokens[Idx] = AsmToken(AsmToken::Error, ""); +} + +AsmToken::TokenKind +AMDGPUAsmParser::getTokenKind() const { + return getLexer().getKind(); +} + +SMLoc +AMDGPUAsmParser::getLoc() const { + return getToken().getLoc(); +} + +StringRef +AMDGPUAsmParser::getTokenStr() const { + return getToken().getString(); +} + +void +AMDGPUAsmParser::lex() { + Parser.Lex(); +} + +//===----------------------------------------------------------------------===// +// swizzle +//===----------------------------------------------------------------------===// + +LLVM_READNONE +static unsigned +encodeBitmaskPerm(const unsigned AndMask, + const unsigned OrMask, + const unsigned XorMask) { + using namespace llvm::AMDGPU::Swizzle; + + return BITMASK_PERM_ENC | + (AndMask << BITMASK_AND_SHIFT) | + (OrMask << BITMASK_OR_SHIFT) | + (XorMask << BITMASK_XOR_SHIFT); +} + +bool +AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg) { + for (unsigned i = 0; i < OpNum; ++i) { + if (!skipToken(AsmToken::Comma, "expected a comma")){ + return false; + } + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (!parseExpr(Op[i])) { + return false; + } + if (Op[i] < MinVal || Op[i] > MaxVal) { + Error(ExprLoc, ErrMsg); + return false; + } + } + + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + int64_t Lane[LANE_NUM]; + if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, + "expected a 2-bit lane id")) { + Imm = QUAD_PERM_ENC; + for (unsigned I = 0; I < LANE_NUM; ++I) { + Imm |= Lane[I] << (LANE_SHIFT * I); + } + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + int64_t LaneIdx; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, + "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + if (parseSwizzleOperands(1, &LaneIdx, + 0, GroupSize - 1, + "lane id must be in the interval [0,group size - 1]")) { + Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 1, 16, "group size must be in the interval [1,16]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (!skipToken(AsmToken::Comma, "expected a comma")) { + return false; + } + + StringRef Ctl; + SMLoc StrLoc = Parser.getTok().getLoc(); + if (!parseString(Ctl)) { + return false; + } + if (Ctl.size() != BITMASK_WIDTH) { + Error(StrLoc, "expected a 5-character mask"); + return false; + } + + unsigned AndMask = 0; + unsigned OrMask = 0; + unsigned XorMask = 0; + + for (size_t i = 0; i < Ctl.size(); ++i) { + unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); + switch(Ctl[i]) { + default: + Error(StrLoc, "invalid mask"); + return false; + case '0': + break; + case '1': + OrMask |= Mask; + break; + case 'p': + AndMask |= Mask; + break; + case 'i': + AndMask |= Mask; + XorMask |= Mask; + break; + } + } + + Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { + + SMLoc OffsetLoc = Parser.getTok().getLoc(); + + if (!parseExpr(Imm)) { + return false; + } + if (!isUInt<16>(Imm)) { + Error(OffsetLoc, "expected a 16-bit offset"); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (skipToken(AsmToken::LParen, "expected a left parentheses")) { + + SMLoc ModeLoc = Parser.getTok().getLoc(); + bool Ok = false; + + if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { + Ok = parseSwizzleQuadPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { + Ok = parseSwizzleBitmaskPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { + Ok = parseSwizzleBroadcast(Imm); + } else if (trySkipId(IdSymbolic[ID_SWAP])) { + Ok = parseSwizzleSwap(Imm); + } else if (trySkipId(IdSymbolic[ID_REVERSE])) { + Ok = parseSwizzleReverse(Imm); + } else { + Error(ModeLoc, "expected a swizzle mode"); + } + + return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); + } + + return false; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int64_t Imm = 0; + + if (trySkipId("offset")) { + + bool Ok = false; + if (skipToken(AsmToken::Colon, "expected a colon")) { + if (trySkipId("swizzle")) { + Ok = parseSwizzleMacro(Imm); + } else { + Ok = parseSwizzleOffset(Imm); + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); + + return Ok? MatchOperand_Success : MatchOperand_ParseFail; + } else { + // Swizzle "offset" operand is optional. + // If it is omitted, try parsing other optional operands. + return parseOptionalOpr(Operands); + } +} + +bool +AMDGPUOperand::isSwizzle() const { + return isImmTy(ImmTySwizzle); +} + +//===----------------------------------------------------------------------===// +// VGPR Index Mode +//===----------------------------------------------------------------------===// + +int64_t AMDGPUAsmParser::parseGPRIdxMacro() { + + using namespace llvm::AMDGPU::VGPRIndexMode; + + if (trySkipToken(AsmToken::RParen)) { + return OFF; + } + + int64_t Imm = 0; + + while (true) { + unsigned Mode = 0; + SMLoc S = Parser.getTok().getLoc(); + + for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { + if (trySkipId(IdSymbolic[ModeId])) { + Mode = 1 << ModeId; + break; + } + } + + if (Mode == 0) { + Error(S, (Imm == 0)? + "expected a VGPR index mode or a closing parenthesis" : + "expected a VGPR index mode"); + break; + } + + if (Imm & Mode) { + Error(S, "duplicate VGPR index mode"); + break; + } + Imm |= Mode; + + if (trySkipToken(AsmToken::RParen)) + break; + if (!skipToken(AsmToken::Comma, + "expected a comma or a closing parenthesis")) + break; + } + + return Imm; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { + + int64_t Imm = 0; + SMLoc S = Parser.getTok().getLoc(); + + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "gpr_idx" && + getLexer().peekTok().is(AsmToken::LParen)) { + + Parser.Lex(); + Parser.Lex(); + + // If parse failed, trigger an error but do not return error code + // to avoid excessive error messages. + Imm = parseGPRIdxMacro(); + + } else { + if (getParser().parseAbsoluteExpression(Imm)) + return MatchOperand_NoMatch; + if (Imm < 0 || !isUInt<4>(Imm)) { + Error(S, "invalid immediate: only 4-bit values are legal"); + } + } + + Operands.push_back( + AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); + return MatchOperand_Success; +} + +bool AMDGPUOperand::isGPRIdxMode() const { + return isImmTy(ImmTyGprIdxMode); +} + +//===----------------------------------------------------------------------===// +// sopp branch targets +//===----------------------------------------------------------------------===// + +OperandMatchResultTy +AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + switch (getLexer().getKind()) { + default: return MatchOperand_ParseFail; + case AsmToken::Integer: { + int64_t Imm; + if (getParser().parseAbsoluteExpression(Imm)) + return MatchOperand_ParseFail; + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); + return MatchOperand_Success; + } + + case AsmToken::Identifier: + Operands.push_back(AMDGPUOperand::CreateExpr(this, + MCSymbolRefExpr::create(getContext().getOrCreateSymbol( + Parser.getTok().getString()), getContext()), S)); + Parser.Lex(); + return MatchOperand_Success; + } +} + +//===----------------------------------------------------------------------===// +// Boolean holding registers +//===----------------------------------------------------------------------===// + +OperandMatchResultTy +AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { + return parseReg(Operands); +} + +//===----------------------------------------------------------------------===// +// mubuf +//===----------------------------------------------------------------------===// + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); +} + +void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, + const OperandVector &Operands, + bool IsAtomic, + bool IsAtomicReturn, + bool IsLds) { + bool IsLdsOpcode = IsLds; + bool HasLdsModifier = false; + OptionalImmIndexMap OptionalIdx; + assert(IsAtomicReturn ? IsAtomic : true); + unsigned FirstOperandIdx = 1; + + for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + // Insert a tied src for atomic return dst. + // This cannot be postponed as subsequent calls to + // addImmOperands rely on correct number of MC operands. + if (IsAtomicReturn && i == FirstOperandIdx) + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); + continue; + } + + HasLdsModifier |= Op.isLDS(); + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + // This is a workaround for an llvm quirk which may result in an + // incorrect instruction selection. Lds and non-lds versions of + // MUBUF instructions are identical except that lds versions + // have mandatory 'lds' modifier. However this modifier follows + // optional modifiers and llvm asm matcher regards this 'lds' + // modifier as an optional one. As a result, an lds version + // of opcode may be selected even if it has no 'lds' modifier. + if (IsLdsOpcode && !HasLdsModifier) { + int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); + if (NoLdsOpcode != -1) { // Got lds version - correct it. + Inst.setOpcode(NoLdsOpcode); + IsLdsOpcode = false; + } + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); + if (!IsAtomic) { // glc is hard-coded. + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); + } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + + if (!IsLdsOpcode) { // tfe is not legal with lds opcodes + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); + } + + if (isGFX10()) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); +} + +void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); + continue; + } + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOffset); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); + + if (isGFX10()) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); +} + +//===----------------------------------------------------------------------===// +// mimg +//===----------------------------------------------------------------------===// + +void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic) { + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + if (IsAtomic) { + // Add src, same as dst + assert(Desc.getNumDefs() == 1); + ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); + } + + OptionalImmIndexMap OptionalIdx; + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else if (!Op.isToken()) { + llvm_unreachable("unexpected operand type"); + } + } + + bool IsGFX10 = isGFX10(); + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); + if (IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); + if (IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); + if (!IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); +} + +void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { + cvtMIMG(Inst, Operands, true); +} + +//===----------------------------------------------------------------------===// +// smrd +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isSMRDOffset8() const { + return isImm() && isUInt<8>(getImm()); +} + +bool AMDGPUOperand::isSMRDOffset20() const { + return isImm() && isUInt<20>(getImm()); +} + +bool AMDGPUOperand::isSMRDLiteralOffset() const { + // 32-bit literals are only supported on CI and we only want to use them + // when the offset is > 8-bits. + return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + +//===----------------------------------------------------------------------===// +// vop3 +//===----------------------------------------------------------------------===// + +static bool ConvertOmodMul(int64_t &Mul) { + if (Mul != 1 && Mul != 2 && Mul != 4) + return false; + + Mul >>= 1; + return true; +} + +static bool ConvertOmodDiv(int64_t &Div) { + if (Div == 1) { + Div = 0; + return true; + } + + if (Div == 2) { + Div = 3; + return true; + } + + return false; +} + +static bool ConvertBoundCtrl(int64_t &BoundCtrl) { + if (BoundCtrl == 0) { + BoundCtrl = 1; + return true; + } + + if (BoundCtrl == -1) { + BoundCtrl = 0; + return true; + } + + return false; +} + +// Note: the order in this table matches the order of operands in AsmString. +static const OptionalOperand AMDGPUOptionalOperandTable[] = { + {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, + {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, + {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, + {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, + {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, + {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, + {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, + {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, + {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, + {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, + {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, + {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, + {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, + {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, + {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, + {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, + {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, + {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, + {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, + {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, + {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, + {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, + {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, + {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, + {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, + {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, + {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, + {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, + {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, + {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, + {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, + {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, + {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, + {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, + {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, + {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, + {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, + {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, + {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, + {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, + {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, + {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, + {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} +}; + +OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { + unsigned size = Operands.size(); + assert(size > 0); + + OperandMatchResultTy res = parseOptionalOpr(Operands); + + // This is a hack to enable hardcoded mandatory operands which follow + // optional operands. + // + // Current design assumes that all operands after the first optional operand + // are also optional. However implementation of some instructions violates + // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). + // + // To alleviate this problem, we have to (implicitly) parse extra operands + // to make sure autogenerated parser of custom operands never hit hardcoded + // mandatory operands. + + if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { + + // We have parsed the first optional operand. + // Parse as many operands as necessary to skip all mandatory operands. + + for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { + if (res != MatchOperand_Success || + getLexer().is(AsmToken::EndOfStatement)) break; + if (getLexer().is(AsmToken::Comma)) Parser.Lex(); + res = parseOptionalOpr(Operands); + } + } + + return res; +} + +OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { + OperandMatchResultTy res; + for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { + // try to parse any optional operand here + if (Op.IsBit) { + res = parseNamedBit(Op.Name, Operands, Op.Type); + } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { + res = parseOModOperand(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || + Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || + Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { + res = parseSDWASel(Operands, Op.Name, Op.Type); + } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { + res = parseSDWADstUnused(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || + Op.Type == AMDGPUOperand::ImmTyOpSelHi || + Op.Type == AMDGPUOperand::ImmTyNegLo || + Op.Type == AMDGPUOperand::ImmTyNegHi) { + res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, + Op.ConvertResult); + } else if (Op.Type == AMDGPUOperand::ImmTyDim) { + res = parseDim(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { + res = parseDfmtNfmt(Operands); + } else { + res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); + } + if (res != MatchOperand_NoMatch) { + return res; + } + } + return MatchOperand_NoMatch; +} + +OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { + StringRef Name = Parser.getTok().getString(); + if (Name == "mul") { + return parseIntWithPrefix("mul", Operands, + AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); + } + + if (Name == "div") { + return parseIntWithPrefix("div", Operands, + AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); + } + + return MatchOperand_NoMatch; +} + +void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { + cvtVOP3P(Inst, Operands); + + int Opc = Inst.getOpcode(); + + int SrcNum; + const int Ops[] = { AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; + for (SrcNum = 0; + SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; + ++SrcNum); + assert(SrcNum > 0); + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + if ((OpSel & (1 << SrcNum)) != 0) { + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); + Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); + } +} + +static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { + // 1. This operand is input modifiers + return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS + // 2. This is not last operand + && Desc.NumOperands > (OpNum + 1) + // 3. Next operand is register class + && Desc.OpInfo[OpNum + 1].RegClass != -1 + // 4. Next register is not tied to any other operand + && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; +} + +void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) +{ + OptionalImmIndexMap OptionalIdx; + unsigned Opc = Inst.getOpcode(); + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isInterpSlot() || + Op.isInterpAttr() || + Op.isAttrChan()) { + Inst.addOperand(MCOperand::createImm(Op.getImm())); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("unhandled operand type"); + } + } + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); + } + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } +} + +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { + unsigned Opc = Inst.getOpcode(); + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { + // This instruction has src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else if (Op.isRegOrImm()) { + Op.addRegOrImmOperands(Inst, 1); + } else { + llvm_unreachable("unhandled operand type"); + } + } + } else { + // No src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isMod()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + Op.addRegOrImmOperands(Inst, 1); + } + } + } + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } + + // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): + // it has src2 register operand that is tied to dst operand + // we don't allow modifiers for this operand in assembler so src2_modifiers + // should be 0. + if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || + Opc == AMDGPU::V_MAC_F32_e64_gfx10 || + Opc == AMDGPU::V_MAC_F32_e64_vi || + Opc == AMDGPU::V_MAC_F16_e64_vi || + Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || + Opc == AMDGPU::V_FMAC_F32_e64_vi || + Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { + auto it = Inst.begin(); + std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); + it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 + ++it; + Inst.insert(it, Inst.getOperand(0)); // src2 = dst + } +} + +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + cvtVOP3(Inst, Operands, OptionalIdx); +} + +void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, + const OperandVector &Operands) { + OptionalImmIndexMap OptIdx; + const int Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; + + cvtVOP3(Inst, Operands, OptIdx); + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { + assert(!IsPacked); + Inst.addOperand(Inst.getOperand(0)); + } + + // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 + // instruction, and then figure out where to actually put the modifiers + + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); + + int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); + if (OpSelHiIdx != -1) { + int DefaultVal = IsPacked ? -1 : 0; + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, + DefaultVal); + } + + int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); + if (NegLoIdx != -1) { + assert(IsPacked); + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); + } + + const int Ops[] = { AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; + const int ModOps[] = { AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers }; + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + unsigned OpSelHi = 0; + unsigned NegLo = 0; + unsigned NegHi = 0; + + if (OpSelHiIdx != -1) { + OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); + } + + if (NegLoIdx != -1) { + int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); + NegLo = Inst.getOperand(NegLoIdx).getImm(); + NegHi = Inst.getOperand(NegHiIdx).getImm(); + } + + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); + if (OpIdx == -1) + break; + + uint32_t ModVal = 0; + + if ((OpSel & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_0; + + if ((OpSelHi & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_1; + + if ((NegLo & (1 << J)) != 0) + ModVal |= SISrcMods::NEG; + + if ((NegHi & (1 << J)) != 0) + ModVal |= SISrcMods::NEG_HI; + + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + + Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); + } +} + +//===----------------------------------------------------------------------===// +// dpp +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isDPP8() const { + return isImmTy(ImmTyDPP8); +} + +bool AMDGPUOperand::isDPPCtrl() const { + using namespace AMDGPU::DPP; + + bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); + if (result) { + int64_t Imm = getImm(); + return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || + (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || + (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || + (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || + (Imm == DppCtrl::WAVE_SHL1) || + (Imm == DppCtrl::WAVE_ROL1) || + (Imm == DppCtrl::WAVE_SHR1) || + (Imm == DppCtrl::WAVE_ROR1) || + (Imm == DppCtrl::ROW_MIRROR) || + (Imm == DppCtrl::ROW_HALF_MIRROR) || + (Imm == DppCtrl::BCAST15) || + (Imm == DppCtrl::BCAST31) || + (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || + (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); + } + return false; +} + +//===----------------------------------------------------------------------===// +// mAI +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isBLGP() const { + return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); +} + +bool AMDGPUOperand::isCBSZ() const { + return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); +} + +bool AMDGPUOperand::isABID() const { + return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); +} + +bool AMDGPUOperand::isS16Imm() const { + return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); +} + +bool AMDGPUOperand::isU16Imm() const { + return isImm() && isUInt<16>(getImm()); +} + +OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { + if (!isGFX10()) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + + if (getLexer().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + if (getLexer().getTok().getString() != "dim") + return MatchOperand_NoMatch; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + Parser.Lex(); + + // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an + // integer. + std::string Token; + if (getLexer().is(AsmToken::Integer)) { + SMLoc Loc = getLexer().getTok().getEndLoc(); + Token = getLexer().getTok().getString(); + Parser.Lex(); + if (getLexer().getTok().getLoc() != Loc) + return MatchOperand_ParseFail; + } + if (getLexer().isNot(AsmToken::Identifier)) + return MatchOperand_ParseFail; + Token += getLexer().getTok().getString(); + + StringRef DimId = Token; + if (DimId.startswith("SQ_RSRC_IMG_")) + DimId = DimId.substr(12); + + const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); + if (!DimInfo) + return MatchOperand_ParseFail; + + Parser.Lex(); + + Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, + AMDGPUOperand::ImmTyDim)); + return MatchOperand_Success; +} + +OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + StringRef Prefix; + + if (getLexer().getKind() == AsmToken::Identifier) { + Prefix = Parser.getTok().getString(); + } else { + return MatchOperand_NoMatch; + } + + if (Prefix != "dpp8") + return parseDPPCtrl(Operands); + if (!isGFX10()) + return MatchOperand_NoMatch; + + // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] + + int64_t Sels[8]; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::LBrac)) + return MatchOperand_ParseFail; + + Parser.Lex(); + if (getParser().parseAbsoluteExpression(Sels[0])) + return MatchOperand_ParseFail; + if (0 > Sels[0] || 7 < Sels[0]) + return MatchOperand_ParseFail; + + for (size_t i = 1; i < 8; ++i) { + if (getLexer().isNot(AsmToken::Comma)) + return MatchOperand_ParseFail; + + Parser.Lex(); + if (getParser().parseAbsoluteExpression(Sels[i])) + return MatchOperand_ParseFail; + if (0 > Sels[i] || 7 < Sels[i]) + return MatchOperand_ParseFail; + } + + if (getLexer().isNot(AsmToken::RBrac)) + return MatchOperand_ParseFail; + Parser.Lex(); + + unsigned DPP8 = 0; + for (size_t i = 0; i < 8; ++i) + DPP8 |= (Sels[i] << (i * 3)); + + Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { + using namespace AMDGPU::DPP; + + SMLoc S = Parser.getTok().getLoc(); + StringRef Prefix; + int64_t Int; + + if (getLexer().getKind() == AsmToken::Identifier) { + Prefix = Parser.getTok().getString(); + } else { + return MatchOperand_NoMatch; + } + + if (Prefix == "row_mirror") { + Int = DppCtrl::ROW_MIRROR; + Parser.Lex(); + } else if (Prefix == "row_half_mirror") { + Int = DppCtrl::ROW_HALF_MIRROR; + Parser.Lex(); + } else { + // Check to prevent parseDPPCtrlOps from eating invalid tokens + if (Prefix != "quad_perm" + && Prefix != "row_shl" + && Prefix != "row_shr" + && Prefix != "row_ror" + && Prefix != "wave_shl" + && Prefix != "wave_rol" + && Prefix != "wave_shr" + && Prefix != "wave_ror" + && Prefix != "row_bcast" + && Prefix != "row_share" + && Prefix != "row_xmask") { + return MatchOperand_NoMatch; + } + + if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) + return MatchOperand_NoMatch; + + if (!isVI() && !isGFX9() && + (Prefix == "wave_shl" || Prefix == "wave_shr" || + Prefix == "wave_rol" || Prefix == "wave_ror" || + Prefix == "row_bcast")) + return MatchOperand_NoMatch; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + if (Prefix == "quad_perm") { + // quad_perm:[%d,%d,%d,%d] + Parser.Lex(); + if (getLexer().isNot(AsmToken::LBrac)) + return MatchOperand_ParseFail; + Parser.Lex(); + + if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) + return MatchOperand_ParseFail; + + for (int i = 0; i < 3; ++i) { + if (getLexer().isNot(AsmToken::Comma)) + return MatchOperand_ParseFail; + Parser.Lex(); + + int64_t Temp; + if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) + return MatchOperand_ParseFail; + const int shift = i*2 + 2; + Int += (Temp << shift); + } + + if (getLexer().isNot(AsmToken::RBrac)) + return MatchOperand_ParseFail; + Parser.Lex(); + } else { + // sel:%d + Parser.Lex(); + if (getParser().parseAbsoluteExpression(Int)) + return MatchOperand_ParseFail; + + if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { + Int |= DppCtrl::ROW_SHL0; + } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { + Int |= DppCtrl::ROW_SHR0; + } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { + Int |= DppCtrl::ROW_ROR0; + } else if (Prefix == "wave_shl" && 1 == Int) { + Int = DppCtrl::WAVE_SHL1; + } else if (Prefix == "wave_rol" && 1 == Int) { + Int = DppCtrl::WAVE_ROL1; + } else if (Prefix == "wave_shr" && 1 == Int) { + Int = DppCtrl::WAVE_SHR1; + } else if (Prefix == "wave_ror" && 1 == Int) { + Int = DppCtrl::WAVE_ROR1; + } else if (Prefix == "row_bcast") { + if (Int == 15) { + Int = DppCtrl::BCAST15; + } else if (Int == 31) { + Int = DppCtrl::BCAST31; + } else { + return MatchOperand_ParseFail; + } + } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { + Int |= DppCtrl::ROW_SHARE_FIRST; + } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { + Int |= DppCtrl::ROW_XMASK_FIRST; + } else { + return MatchOperand_ParseFail; + } + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); + return MatchOperand_Success; +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { + return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { + return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); +} + +void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { + OptionalImmIndexMap OptionalIdx; + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + int Fi = 0; + for (unsigned E = Operands.size(); I != E; ++I) { + auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), + MCOI::TIED_TO); + if (TiedTo != -1) { + assert((unsigned)TiedTo < Inst.getNumOperands()); + // handle tied old or src2 for MAC instructions + Inst.addOperand(Inst.getOperand(TiedTo)); + } + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + // Add the register arguments + if (Op.isReg() && validateVccOperand(Op.getReg())) { + // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. + // Skip it. + continue; + } + + if (IsDPP8) { + if (Op.isDPP8()) { + Op.addImmOperands(Inst, 1); + } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegWithFPInputModsOperands(Inst, 2); + } else if (Op.isFI()) { + Fi = Op.getImm(); + } else if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + } else { + llvm_unreachable("Invalid operand type"); + } + } else { + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegWithFPInputModsOperands(Inst, 2); + } else if (Op.isDPPCtrl()) { + Op.addImmOperands(Inst, 1); + } else if (Op.isImm()) { + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("Invalid operand type"); + } + } + } + + if (IsDPP8) { + using namespace llvm::AMDGPU::DPP; + Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); + } else { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); + } + } +} + +//===----------------------------------------------------------------------===// +// sdwa +//===----------------------------------------------------------------------===// + +OperandMatchResultTy +AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, + AMDGPUOperand::ImmTy Type) { + using namespace llvm::AMDGPU::SDWA; + + SMLoc S = Parser.getTok().getLoc(); + StringRef Value; + OperandMatchResultTy res; + + res = parseStringWithPrefix(Prefix, Value); + if (res != MatchOperand_Success) { + return res; + } + + int64_t Int; + Int = StringSwitch<int64_t>(Value) + .Case("BYTE_0", SdwaSel::BYTE_0) + .Case("BYTE_1", SdwaSel::BYTE_1) + .Case("BYTE_2", SdwaSel::BYTE_2) + .Case("BYTE_3", SdwaSel::BYTE_3) + .Case("WORD_0", SdwaSel::WORD_0) + .Case("WORD_1", SdwaSel::WORD_1) + .Case("DWORD", SdwaSel::DWORD) + .Default(0xffffffff); + Parser.Lex(); // eat last token + + if (Int == 0xffffffff) { + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { + using namespace llvm::AMDGPU::SDWA; + + SMLoc S = Parser.getTok().getLoc(); + StringRef Value; + OperandMatchResultTy res; + + res = parseStringWithPrefix("dst_unused", Value); + if (res != MatchOperand_Success) { + return res; + } + + int64_t Int; + Int = StringSwitch<int64_t>(Value) + .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) + .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) + .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) + .Default(0xffffffff); + Parser.Lex(); // eat last token + + if (Int == 0xffffffff) { + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); + return MatchOperand_Success; +} + +void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); +} + +void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); +} + +void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); +} + +void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); +} + +void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, + uint64_t BasicInstType, bool skipVcc) { + using namespace llvm::AMDGPU::SDWA; + + OptionalImmIndexMap OptionalIdx; + bool skippedVcc = false; + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (skipVcc && !skippedVcc && Op.isReg() && + (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { + // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. + // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) + // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. + // Skip VCC only if we didn't skip it on previous iteration. + if (BasicInstType == SIInstrFlags::VOP2 && + (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { + skippedVcc = true; + continue; + } else if (BasicInstType == SIInstrFlags::VOPC && + Inst.getNumOperands() == 0) { + skippedVcc = true; + continue; + } + } + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithInputModsOperands(Inst, 2); + } else if (Op.isImm()) { + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("Invalid operand type"); + } + skippedVcc = false; + } + + if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && + Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && + Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments + switch (BasicInstType) { + case SIInstrFlags::VOP1: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); + break; + + case SIInstrFlags::VOP2: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); + break; + + case SIInstrFlags::VOPC: + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); + break; + + default: + llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); + } + } + + // special case v_mac_{f16, f32}: + // it has src2 register operand that is tied to dst operand + if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || + Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { + auto it = Inst.begin(); + std::advance( + it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); + Inst.insert(it, Inst.getOperand(0)); // src2 = dst + } +} + +//===----------------------------------------------------------------------===// +// mAI +//===----------------------------------------------------------------------===// + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); +} + +/// Force static initialization. +extern "C" void LLVMInitializeAMDGPUAsmParser() { + RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); + RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER +#include "AMDGPUGenAsmMatcher.inc" + +// This fuction should be defined after auto-generated include so that we have +// MatchClassKind enum defined +unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) { + // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). + // But MatchInstructionImpl() expects to meet token and fails to validate + // operand. This method checks if we are given immediate operand but expect to + // get corresponding token. + AMDGPUOperand &Operand = (AMDGPUOperand&)Op; + switch (Kind) { + case MCK_addr64: + return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; + case MCK_gds: + return Operand.isGDS() ? Match_Success : Match_InvalidOperand; + case MCK_lds: + return Operand.isLDS() ? Match_Success : Match_InvalidOperand; + case MCK_glc: + return Operand.isGLC() ? Match_Success : Match_InvalidOperand; + case MCK_idxen: + return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; + case MCK_offen: + return Operand.isOffen() ? Match_Success : Match_InvalidOperand; + case MCK_SSrcB32: + // When operands have expression values, they will return true for isToken, + // because it is not possible to distinguish between a token and an + // expression at parse time. MatchInstructionImpl() will always try to + // match an operand as a token, when isToken returns true, and when the + // name of the expression is not a valid token, the match will fail, + // so we need to handle it here. + return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; + case MCK_SSrcF32: + return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; + case MCK_SoppBrTarget: + return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; + case MCK_VReg32OrOff: + return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; + case MCK_InterpSlot: + return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; + case MCK_Attr: + return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; + case MCK_AttrChan: + return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; + default: + return Match_InvalidOperand; + } +} + +//===----------------------------------------------------------------------===// +// endpgm +//===----------------------------------------------------------------------===// + +OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int64_t Imm = 0; + + if (!parseExpr(Imm)) { + // The operand is optional, if not present default to 0 + Imm = 0; + } + + if (!isUInt<16>(Imm)) { + Error(S, "expected a 16-bit value"); + return MatchOperand_ParseFail; + } + + Operands.push_back( + AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); + return MatchOperand_Success; +} + +bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } |