diff options
Diffstat (limited to 'lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r-- | lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 856 |
1 files changed, 702 insertions, 154 deletions
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 3cf9a1d924696..961f7186f3731 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -16,6 +16,7 @@ #include "Utils/AMDGPUAsmUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" @@ -39,15 +40,12 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/MathExtras.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -56,7 +54,6 @@ #include <map> #include <memory> #include <string> -#include <vector> using namespace llvm; using namespace llvm::AMDGPU; @@ -83,7 +80,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { const AMDGPUAsmParser *AsmParser; public: - AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_) + AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} typedef std::unique_ptr<AMDGPUOperand> Ptr; @@ -160,7 +157,11 @@ public: ImmTySendMsg, ImmTyInterpSlot, ImmTyInterpAttr, - ImmTyAttrChan + ImmTyAttrChan, + ImmTyOpSel, + ImmTyOpSelHi, + ImmTyNegLo, + ImmTyNegHi }; struct TokOp { @@ -297,6 +298,10 @@ public: bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } + bool isOpSel() const { return isImmTy(ImmTyOpSel); } + bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } + bool isNegLo() const { return isImmTy(ImmTyNegLo); } + bool isNegHi() const { return isImmTy(ImmTyNegHi); } bool isMod() const { return isClampSI() || isOModSI(); @@ -316,6 +321,10 @@ public: return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); } + bool isSCSrcV2B16() const { + return isSCSrcB16(); + } + bool isSCSrcB32() const { return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); } @@ -328,6 +337,10 @@ public: return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); } + bool isSCSrcV2F16() const { + return isSCSrcF16(); + } + bool isSCSrcF32() const { return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); } @@ -344,6 +357,11 @@ public: return isSCSrcB16() || isLiteralImm(MVT::i16); } + bool isSSrcV2B16() const { + llvm_unreachable("cannot happen"); + return isSSrcB16(); + } + bool isSSrcB64() const { // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. // See isVSrc64(). @@ -362,6 +380,11 @@ public: return isSCSrcB16() || isLiteralImm(MVT::f16); } + bool isSSrcV2F16() const { + llvm_unreachable("cannot happen"); + return isSSrcF16(); + } + bool isVCSrcB32() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); } @@ -374,6 +397,10 @@ public: return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); } + bool isVCSrcV2B16() const { + return isVCSrcB16(); + } + bool isVCSrcF32() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); } @@ -386,6 +413,10 @@ public: return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); } + bool isVCSrcV2F16() const { + return isVCSrcF16(); + } + bool isVSrcB32() const { return isVCSrcF32() || isLiteralImm(MVT::i32); } @@ -398,6 +429,11 @@ public: return isVCSrcF16() || isLiteralImm(MVT::i16); } + bool isVSrcV2B16() const { + llvm_unreachable("cannot happen"); + return isVSrcB16(); + } + bool isVSrcF32() const { return isVCSrcF32() || isLiteralImm(MVT::f32); } @@ -410,6 +446,11 @@ public: return isVCSrcF16() || isLiteralImm(MVT::f16); } + bool isVSrcV2F16() const { + llvm_unreachable("cannot happen"); + return isVSrcF16(); + } + bool isKImmFP32() const { return isLiteralImm(MVT::f32); } @@ -459,7 +500,7 @@ public: return Imm.Val; } - enum ImmTy getImmTy() const { + ImmTy getImmTy() const { assert(isImm()); return Imm.Type; } @@ -501,9 +542,11 @@ public: return getModifiers().hasIntModifiers(); } + uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; + void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; - void addLiteralImmOperand(MCInst &Inst, int64_t Val) const; + void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; template <unsigned Bitwidth> void addKImmFPOperands(MCInst &Inst, unsigned N) const; @@ -610,6 +653,10 @@ public: case ImmTyInterpSlot: OS << "InterpSlot"; break; case ImmTyInterpAttr: OS << "InterpAttr"; break; case ImmTyAttrChan: OS << "AttrChan"; break; + case ImmTyOpSel: OS << "OpSel"; break; + case ImmTyOpSelHi: OS << "OpSelHi"; break; + case ImmTyNegLo: OS << "NegLo"; break; + case ImmTyNegHi: OS << "NegHi"; break; } } @@ -636,7 +683,7 @@ public: static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, int64_t Val, SMLoc Loc, - enum ImmTy Type = ImmTyNone, + ImmTy Type = ImmTyNone, bool IsFPImm = false) { auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); Op->Imm.Val = Val; @@ -695,9 +742,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next // .amdgpu_hsa_kernel or at EOF. class KernelScopeInfo { - int SgprIndexUnusedMin; - int VgprIndexUnusedMin; - MCContext *Ctx; + int SgprIndexUnusedMin = -1; + int VgprIndexUnusedMin = -1; + MCContext *Ctx = nullptr; void usesSgprAt(int i) { if (i >= SgprIndexUnusedMin) { @@ -708,6 +755,7 @@ class KernelScopeInfo { } } } + void usesVgprAt(int i) { if (i >= VgprIndexUnusedMin) { VgprIndexUnusedMin = ++i; @@ -717,14 +765,16 @@ class KernelScopeInfo { } } } + public: - KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr) - {} + KernelScopeInfo() = default; + void initialize(MCContext &Context) { Ctx = &Context; usesSgprAt(SgprIndexUnusedMin = -1); usesVgprAt(VgprIndexUnusedMin = -1); } + void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { switch (RegKind) { case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; @@ -738,9 +788,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser { const MCInstrInfo &MII; MCAsmParser &Parser; - unsigned ForcedEncodingSize; - bool ForcedDPP; - bool ForcedSDWA; + unsigned ForcedEncodingSize = 0; + bool ForcedDPP = false; + bool ForcedSDWA = false; KernelScopeInfo KernelScope; /// @name Auto-generated Match Functions @@ -756,7 +806,7 @@ private: bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); bool ParseDirectiveHSACodeObjectVersion(); bool ParseDirectiveHSACodeObjectISA(); - bool ParseDirectiveRuntimeMetadata(); + bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); bool ParseSectionDirectiveHSAText(); @@ -767,44 +817,52 @@ private: bool ParseSectionDirectiveHSADataGlobalAgent(); bool ParseSectionDirectiveHSADataGlobalProgram(); bool ParseSectionDirectiveHSARodataReadonlyAgent(); - bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); - bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex); - void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn); + bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, + RegisterKind RegKind, unsigned Reg1, + unsigned RegNum); + bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, + unsigned& RegNum, unsigned& RegWidth, + unsigned *DwordRegIndex); + void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic, bool IsAtomicReturn); + void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, + bool IsGdsHardcoded); public: enum AMDGPUMatchResultTy { Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY }; + typedef std::map<AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap; + AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser), - ForcedEncodingSize(0), - ForcedDPP(false), - ForcedSDWA(false) { + : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) { MCAsmParserExtension::Initialize(Parser); - if (getSTI().getFeatureBits().none()) { + if (getFeatureBits().none()) { // Set default features. copySTI().ToggleFeature("SOUTHERN_ISLANDS"); } - setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); { // TODO: make those pre-defined variables read-only. // Currently there is none suitable machinery in the core llvm-mc for this. // MCSymbol::isRedefinable is intended for another purpose, and // AsmParser::parseDirectiveSet() cannot be specialized for specific target. - AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); MCContext &Ctx = getContext(); - MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx)); + MCSymbol *Sym = + Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); } KernelScope.initialize(getContext()); } @@ -822,7 +880,7 @@ public: } bool hasInv2PiInlineImm() const { - return getSTI().getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; + return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } bool hasSGPR102_SGPR103() const { @@ -844,6 +902,10 @@ public: return &MII; } + const FeatureBitset &getFeatureBits() const { + return getSTI().getFeatureBits(); + } + void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } @@ -871,19 +933,28 @@ public: //bool ProcessInstruction(MCInst &Inst); OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); + OperandMatchResultTy parseIntWithPrefix(const char *Prefix, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, bool (*ConvertResult)(int64_t &) = nullptr); + + OperandMatchResultTy parseOperandArrayWithPrefix( + const char *Prefix, + OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, + bool (*ConvertResult)(int64_t&) = nullptr); + OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); + AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, StringRef &Value); - OperandMatchResultTy parseImm(OperandVector &Operands); + bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); + OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); OperandMatchResultTy parseReg(OperandVector &Operands); - OperandMatchResultTy parseRegOrImm(OperandVector &Operands); + OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); @@ -891,7 +962,8 @@ public: OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); - void cvtDS(MCInst &Inst, const OperandVector &Operands); + void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } + void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } void cvtExp(MCInst &Inst, const OperandVector &Operands); bool parseCnt(int64_t &IntVal); @@ -911,6 +983,12 @@ private: void errorExpTgt(); OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); + bool validateOperandLimitations(const MCInst &Inst); + bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); + bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; + unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; + bool isSGPR(unsigned Reg); + public: OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); @@ -940,7 +1018,13 @@ public: void cvtId(MCInst &Inst, const OperandVector &Operands); void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands); + + void cvtVOP3Impl(MCInst &Inst, + const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); void cvtMIMG(MCInst &Inst, const OperandVector &Operands); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); @@ -988,6 +1072,30 @@ static const fltSemantics *getFltSemantics(MVT VT) { return getFltSemantics(VT.getSizeInBits() / 8); } +static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { + switch (OperandType) { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + return &APFloat::IEEEsingle(); + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + return &APFloat::IEEEdouble(); + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + return &APFloat::IEEEhalf(); + default: + llvm_unreachable("unsupported fp type"); + } +} + //===----------------------------------------------------------------------===// // Operand //===----------------------------------------------------------------------===// @@ -1031,13 +1139,18 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { if (!canLosslesslyConvertToFPType(FPLiteral, type)) return false; + if (type.getScalarSizeInBits() == 16) { + return AMDGPU::isInlinableLiteral16( + static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), + AsmParser->hasInv2PiInlineImm()); + } + // Check if single precision literal is inlinable return AMDGPU::isInlinableLiteral32( static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), AsmParser->hasInv2PiInlineImm()); } - // We got int literal token. if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand return AMDGPU::isInlinableLiteral64(Imm.Val, @@ -1064,6 +1177,13 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const { if (!Imm.IsFPImm) { // We got int literal token. + if (type == MVT::f64 && hasFPModifiers()) { + // Cannot apply fp modifiers to int literals preserving the same semantics + // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, + // disable these cases. + return false; + } + unsigned Size = type.getSizeInBits(); if (Size == 64) Size = 32; @@ -1093,40 +1213,57 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const { return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); } -void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { - int64_t Val = Imm.Val; - if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) { - // Apply modifiers to immediate value. Only negate can get here - if (Imm.IsFPImm) { - APFloat F(BitsToDouble(Val)); - F.changeSign(); - Val = F.bitcastToAPInt().getZExtValue(); - } else { - Val = -Val; - } +uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const +{ + assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); + assert(Size == 2 || Size == 4 || Size == 8); + + const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); + + if (Imm.Mods.Abs) { + Val &= ~FpSignMask; } + if (Imm.Mods.Neg) { + Val ^= FpSignMask; + } + + return Val; +} + +void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), Inst.getNumOperands())) { - addLiteralImmOperand(Inst, Val); + addLiteralImmOperand(Inst, Imm.Val, + ApplyModifiers & + isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); } else { - Inst.addOperand(MCOperand::createImm(Val)); + assert(!isImmTy(ImmTyNone) || !hasModifiers()); + Inst.addOperand(MCOperand::createImm(Imm.Val)); } } -void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { +void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); auto OpNum = Inst.getNumOperands(); // Check that this operand accepts literals assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); - auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size + if (ApplyModifiers) { + assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); + const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); + Val = applyInputFPModifiers(Val, Size); + } + + APInt Literal(64, Val); + uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; if (Imm.IsFPImm) { // We got fp literal token - APInt Literal(64, Val); - - switch (OpSize) { - case 8: { + switch (OpTy) { + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: { if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); @@ -1151,16 +1288,31 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { // in predicate methods (isLiteralImm()) llvm_unreachable("fp literal in 64-bit integer instruction."); } - case 4: - case 2: { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { bool lost; APFloat FPLiteral(APFloat::IEEEdouble(), Literal); // Convert literal to single precision - FPLiteral.convert(*getFltSemantics(OpSize), + FPLiteral.convert(*getOpFltSemantics(OpTy), APFloat::rmNearestTiesToEven, &lost); // We allow precision lost but not overflow or underflow. This should be // checked earlier in isLiteralImm() - Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); + + uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); + if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || + OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { + ImmVal |= (ImmVal << 16); + } + + Inst.addOperand(MCOperand::createImm(ImmVal)); return; } default: @@ -1173,8 +1325,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { // We got int literal token. // Only sign extend inline immediates. // FIXME: No errors on truncation - switch (OpSize) { - case 4: { + switch (OpTy) { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: { if (isInt<32>(Val) && AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), AsmParser->hasInv2PiInlineImm())) { @@ -1185,9 +1340,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); return; } - case 8: { - if (AMDGPU::isInlinableLiteral64(Val, - AsmParser->hasInv2PiInlineImm())) { + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: { + if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); return; } @@ -1195,7 +1352,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Lo_32(Val))); return; } - case 2: { + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: { if (isInt<16>(Val) && AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), AsmParser->hasInv2PiInlineImm())) { @@ -1206,6 +1366,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Val & 0xffff)); return; } + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { + auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); + assert(AMDGPU::isInlinableLiteral16(LiteralVal, + AsmParser->hasInv2PiInlineImm())); + + uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | + static_cast<uint32_t>(LiteralVal); + Inst.addOperand(MCOperand::createImm(ImmVal)); + return; + } default: llvm_unreachable("invalid operand size"); } @@ -1289,7 +1460,8 @@ static unsigned getSpecialRegForName(StringRef RegName) { .Default(0); } -bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { +bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { auto R = parseRegister(); if (!R) return true; assert(R->isReg()); @@ -1299,20 +1471,43 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End return false; } -bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum) -{ +bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, + RegisterKind RegKind, unsigned Reg1, + unsigned RegNum) { switch (RegKind) { case IS_SPECIAL: - if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; RegWidth = 2; return true; } - if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; RegWidth = 2; return true; } - if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; return true; } - if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; RegWidth = 2; return true; } - if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; RegWidth = 2; return true; } + if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { + Reg = AMDGPU::EXEC; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { + Reg = AMDGPU::FLAT_SCR; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { + Reg = AMDGPU::VCC; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { + Reg = AMDGPU::TBA; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { + Reg = AMDGPU::TMA; + RegWidth = 2; + return true; + } return false; case IS_VGPR: case IS_SGPR: case IS_TTMP: - if (Reg1 != Reg + RegWidth) { return false; } + if (Reg1 != Reg + RegWidth) { + return false; + } RegWidth++; return true; default: @@ -1320,8 +1515,9 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R } } -bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex) -{ +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, + unsigned &RegNum, unsigned &RegWidth, + unsigned *DwordRegIndex) { if (DwordRegIndex) { *DwordRegIndex = 0; } const MCRegisterInfo *TRI = getContext().getRegisterInfo(); if (getLexer().is(AsmToken::Identifier)) { @@ -1462,8 +1658,33 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); } +bool +AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { + if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && + (getLexer().getKind() == AsmToken::Integer || + getLexer().getKind() == AsmToken::Real)) { + + // This is a workaround for handling operands like these: + // |1.0| + // |-1| + // This syntax is not compatible with syntax of standard + // MC expressions (due to the trailing '|'). + + SMLoc EndLoc; + const MCExpr *Expr; + + if (getParser().parsePrimaryExpr(Expr, EndLoc)) { + return true; + } + + return !Expr->evaluateAsAbsolute(Val); + } + + return getParser().parseAbsoluteExpression(Val); +} + OperandMatchResultTy -AMDGPUAsmParser::parseImm(OperandVector &Operands) { +AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { // TODO: add syntactic sugar for 1/(2*PI) bool Minus = false; if (getLexer().getKind() == AsmToken::Minus) { @@ -1475,7 +1696,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) { switch(getLexer().getKind()) { case AsmToken::Integer: { int64_t IntVal; - if (getParser().parseAbsoluteExpression(IntVal)) + if (parseAbsoluteExpr(IntVal, AbsMod)) return MatchOperand_ParseFail; if (Minus) IntVal *= -1; @@ -1484,7 +1705,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) { } case AsmToken::Real: { int64_t IntVal; - if (getParser().parseAbsoluteExpression(IntVal)) + if (parseAbsoluteExpr(IntVal, AbsMod)) return MatchOperand_ParseFail; APFloat F(BitsToDouble(IntVal)); @@ -1512,8 +1733,8 @@ AMDGPUAsmParser::parseReg(OperandVector &Operands) { } OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) { - auto res = parseImm(Operands); +AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { + auto res = parseImm(Operands, AbsMod); if (res != MatchOperand_NoMatch) { return res; } @@ -1522,18 +1743,50 @@ AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) { } OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm) { - // XXX: During parsing we can't determine if minus sign means - // negate-modifier or negative immediate value. - // By default we suppose it is modifier. - bool Negate = false, Abs = false, Abs2 = false; +AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, + bool AllowImm) { + bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; if (getLexer().getKind()== AsmToken::Minus) { + const AsmToken NextToken = getLexer().peekTok(); + + // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. + if (NextToken.is(AsmToken::Minus)) { + Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); + return MatchOperand_ParseFail; + } + + // '-' followed by an integer literal N should be interpreted as integer + // negation rather than a floating-point NEG modifier applied to N. + // Beside being contr-intuitive, such use of floating-point NEG modifier + // results in different meaning of integer literals used with VOP1/2/C + // and VOP3, for example: + // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF + // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 + // Negative fp literals should be handled likewise for unifomtity + if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { + Parser.Lex(); + Negate = true; + } + } + + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "neg") { + if (Negate) { + Error(Parser.getTok().getLoc(), "expected register or immediate"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + Negate2 = true; + if (getLexer().isNot(AsmToken::LParen)) { + Error(Parser.getTok().getLoc(), "expected left paren after neg"); + return MatchOperand_ParseFail; + } Parser.Lex(); - Negate = true; } - if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "abs") { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "abs") { Parser.Lex(); Abs2 = true; if (getLexer().isNot(AsmToken::LParen)) { @@ -1554,7 +1807,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo OperandMatchResultTy Res; if (AllowImm) { - Res = parseRegOrImm(Operands); + Res = parseRegOrImm(Operands, Abs); } else { Res = parseReg(Operands); } @@ -1563,9 +1816,6 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo } AMDGPUOperand::Modifiers Mods; - if (Negate) { - Mods.Neg = true; - } if (Abs) { if (getLexer().getKind() != AsmToken::Pipe) { Error(Parser.getTok().getLoc(), "expected vertical bar"); @@ -1583,6 +1833,17 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo Mods.Abs = true; } + if (Negate) { + Mods.Neg = true; + } else if (Negate2) { + if (getLexer().isNot(AsmToken::RParen)) { + Error(Parser.getTok().getLoc(), "expected closing parentheses"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + Mods.Neg = true; + } + if (Mods.hasFPModifiers()) { AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); Op.setModifiers(Mods); @@ -1591,10 +1852,12 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo } OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm) { +AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, + bool AllowImm) { bool Sext = false; - if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "sext") { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "sext") { Parser.Lex(); Sext = true; if (getLexer().isNot(AsmToken::LParen)) { @@ -1661,7 +1924,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) } unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { - uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || @@ -1719,6 +1981,128 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { return makeArrayRef(Variants); } +unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + const unsigned Num = Desc.getNumImplicitUses(); + for (unsigned i = 0; i < Num; ++i) { + unsigned Reg = Desc.ImplicitUses[i]; + switch (Reg) { + case AMDGPU::FLAT_SCR: + case AMDGPU::VCC: + case AMDGPU::M0: + return Reg; + default: + break; + } + } + return AMDGPU::NoRegister; +} + +bool AMDGPUAsmParser::isSGPR(unsigned Reg) { + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); + const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); + return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || + Reg == AMDGPU::SCC; +} + +// NB: This code is correct only when used to check constant +// bus limitations because GFX7 support no f16 inline constants. +// Note that there are no cases when a GFX7 opcode violates +// constant bus limitations due to the use of an f16 constant. +bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, + unsigned OpIdx) const { + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + + if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { + return false; + } + + const MCOperand &MO = Inst.getOperand(OpIdx); + + int64_t Val = MO.getImm(); + auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); + + switch (OpSize) { // expected operand size + case 8: + return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); + case 4: + return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); + case 2: { + const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; + if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { + return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); + } else { + return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); + } + } + default: + llvm_unreachable("invalid operand size"); + } +} + +bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { + const MCOperand &MO = Inst.getOperand(OpIdx); + if (MO.isImm()) { + return !isInlineConstant(Inst, OpIdx); + } + return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg())); +} + +bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { + const unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + unsigned ConstantBusUseCount = 0; + + if (Desc.TSFlags & + (SIInstrFlags::VOPC | + SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | + SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) { + + // Check special imm operands (used by madmk, etc) + if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { + ++ConstantBusUseCount; + } + + unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); + if (SGPRUsed != AMDGPU::NoRegister) { + ++ConstantBusUseCount; + } + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + for (int OpIdx : OpIndices) { + if (OpIdx == -1) break; + + const MCOperand &MO = Inst.getOperand(OpIdx); + if (usesConstantBus(Inst, OpIdx)) { + if (MO.isReg()) { + const unsigned Reg = mc2PseudoReg(MO.getReg()); + // Pairs of registers with a partial intersections like these + // s0, s[0:1] + // flat_scratch_lo, flat_scratch + // flat_scratch_lo, flat_scratch_hi + // are theoretically valid but they are disabled anyway. + // Note that this code mimics SIInstrInfo::verifyInstruction + if (Reg != SGPRUsed) { + ++ConstantBusUseCount; + } + SGPRUsed = Reg; + } else { // Expression or a literal + ++ConstantBusUseCount; + } + } + } + } + + return ConstantBusUseCount <= 1; +} + bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -1751,6 +2135,10 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (Result) { default: break; case Match_Success: + if (!validateOperandLimitations(Inst)) { + return Error(IDLoc, + "invalid operand (violates constant bus restrictions)"); + } Inst.setLoc(IDLoc); Out.EmitInstruction(Inst, getSTI()); return false; @@ -1793,7 +2181,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { return false; } - bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor) { if (ParseAsAbsoluteExpression(Major)) @@ -1810,7 +2197,6 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, } bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { - uint32_t Major; uint32_t Minor; @@ -1831,9 +2217,10 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { // If this directive has no arguments, then use the ISA version for the // targeted GPU. if (getLexer().is(AsmToken::EndOfStatement)) { - AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits()); - getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor, - Isa.Stepping, + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, + ISA.Stepping, "AMD", "AMDGPU"); return false; } @@ -1873,42 +2260,45 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { return false; } -bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() { - std::string Metadata; - raw_string_ostream MS(Metadata); +bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() { + std::string YamlString; + raw_string_ostream YamlStream(YamlString); getLexer().setSkipSpace(false); bool FoundEnd = false; while (!getLexer().is(AsmToken::Eof)) { while (getLexer().is(AsmToken::Space)) { - MS << ' '; + YamlStream << getLexer().getTok().getString(); Lex(); } if (getLexer().is(AsmToken::Identifier)) { StringRef ID = getLexer().getTok().getIdentifier(); - if (ID == ".end_amdgpu_runtime_metadata") { + if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) { Lex(); FoundEnd = true; break; } } - MS << Parser.parseStringToEndOfStatement() - << getContext().getAsmInfo()->getSeparatorString(); + YamlStream << Parser.parseStringToEndOfStatement() + << getContext().getAsmInfo()->getSeparatorString(); Parser.eatToEndOfStatement(); } getLexer().setSkipSpace(true); - if (getLexer().is(AsmToken::Eof) && !FoundEnd) - return TokError("expected directive .end_amdgpu_runtime_metadata not found"); + if (getLexer().is(AsmToken::Eof) && !FoundEnd) { + return TokError( + "expected directive .end_amdgpu_code_object_metadata not found"); + } - MS.flush(); + YamlStream.flush(); - getTargetStreamer().EmitRuntimeMetadata(Metadata); + if (!getTargetStreamer().EmitCodeObjectMetadata(YamlString)) + return Error(getParser().getTok().getLoc(), "invalid code object metadata"); return false; } @@ -1926,7 +2316,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { amd_kernel_code_t Header; - AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); while (true) { // Lex EndOfStatement. This is in a while loop, because lexing a comment @@ -2020,8 +2410,8 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".hsa_code_object_isa") return ParseDirectiveHSACodeObjectISA(); - if (IDVal == ".amdgpu_runtime_metadata") - return ParseDirectiveRuntimeMetadata(); + if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin) + return ParseDirectiveCodeObjectMetadata(); if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); @@ -2080,7 +2470,6 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, OperandMatchResultTy AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { - // Try to parse with a custom parser OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); @@ -2208,7 +2597,7 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { OperandMatchResultTy AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy, + AMDGPUOperand::ImmTy ImmTy, bool (*ConvertResult)(int64_t&)) { SMLoc S = Parser.getTok().getLoc(); int64_t Value = 0; @@ -2225,9 +2614,59 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, return MatchOperand_Success; } +OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( + const char *Prefix, + OperandVector &Operands, + AMDGPUOperand::ImmTy ImmTy, + bool (*ConvertResult)(int64_t&)) { + StringRef Name = Parser.getTok().getString(); + if (!Name.equals(Prefix)) + return MatchOperand_NoMatch; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::LBrac)) + return MatchOperand_ParseFail; + Parser.Lex(); + + unsigned Val = 0; + SMLoc S = Parser.getTok().getLoc(); + + // FIXME: How to verify the number of elements matches the number of src + // operands? + for (int I = 0; I < 3; ++I) { + if (I != 0) { + if (getLexer().is(AsmToken::RBrac)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return MatchOperand_ParseFail; + Parser.Lex(); + } + + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + + int64_t Op; + if (getParser().parseAbsoluteExpression(Op)) + return MatchOperand_ParseFail; + + if (Op != 0 && Op != 1) + return MatchOperand_ParseFail; + Val |= (Op << I); + } + + Parser.Lex(); + Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); + return MatchOperand_Success; +} + OperandMatchResultTy AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy) { + AMDGPUOperand::ImmTy ImmTy) { int64_t Bit = 0; SMLoc S = Parser.getTok().getLoc(); @@ -2257,11 +2696,11 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, return MatchOperand_Success; } -typedef std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap; - -void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands, - OptionalImmIndexMap& OptionalIdx, - enum AMDGPUOperand::ImmTy ImmT, int64_t Default = 0) { +static void addOptionalImmOperand( + MCInst& Inst, const OperandVector& Operands, + AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, + AMDGPUOperand::ImmTy ImmT, + int64_t Default = 0) { auto i = OptionalIdx.find(ImmT); if (i != OptionalIdx.end()) { unsigned Idx = i->second; @@ -2323,9 +2762,9 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 } -void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { - std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; - bool GDSOnly = false; +void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, + bool IsGdsHardcoded) { + OptionalImmIndexMap OptionalIdx; for (unsigned i = 1, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -2337,7 +2776,7 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { } if (Op.isToken() && Op.getToken() == "gds") { - GDSOnly = true; + IsGdsHardcoded = true; continue; } @@ -2346,9 +2785,7 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); - - if (!GDSOnly) { + if (!IsGdsHardcoded) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); } Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 @@ -2421,13 +2858,14 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) Parser.Lex(); - IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); if (CntName == "vmcnt") - IntVal = encodeVmcnt(IV, IntVal, CntVal); + IntVal = encodeVmcnt(ISA, IntVal, CntVal); else if (CntName == "expcnt") - IntVal = encodeExpcnt(IV, IntVal, CntVal); + IntVal = encodeExpcnt(ISA, IntVal, CntVal); else if (CntName == "lgkmcnt") - IntVal = encodeLgkmcnt(IV, IntVal, CntVal); + IntVal = encodeLgkmcnt(ISA, IntVal, CntVal); else return true; @@ -2436,8 +2874,9 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { - IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); - int64_t Waitcnt = getWaitcntBitMask(IV); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = Parser.getTok().getLoc(); switch(getLexer().getKind()) { @@ -2459,7 +2898,8 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { return MatchOperand_Success; } -bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width) { +bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, + int64_t &Width) { using namespace llvm::AMDGPU::Hwreg; if (Parser.getTok().getString() != "hwreg") @@ -2520,8 +2960,7 @@ bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, return false; } -OperandMatchResultTy -AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { +OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { using namespace llvm::AMDGPU::Hwreg; int64_t Imm16Val = 0; @@ -3170,6 +3609,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, + {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, + {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, + {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, + {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} }; OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { @@ -3186,6 +3629,12 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan res = parseSDWASel(Operands, Op.Name, Op.Type); } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { res = parseSDWADstUnused(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || + Op.Type == AMDGPUOperand::ImmTyOpSelHi || + Op.Type == AMDGPUOperand::ImmTyNegLo || + Op.Type == AMDGPUOperand::ImmTyNegHi) { + res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, + Op.ConvertResult); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); } @@ -3241,8 +3690,8 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; } -void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { - OptionalImmIndexMap OptionalIdx; +void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { @@ -3253,12 +3702,20 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { Op.addRegOrImmWithFPInputModsOperands(Inst, 2); - } else if (Op.isImm()) { + } else if (Op.isImmModifier()) { OptionalIdx[Op.getImmTy()] = I; + } else if (Op.isRegOrImm()) { + Op.addRegOrImmOperands(Inst, 1); } else { llvm_unreachable("unhandled operand type"); } } +} + +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + + cvtVOP3Impl(Inst, Operands, OptionalIdx); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); @@ -3283,6 +3740,96 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { } } +void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isMod()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + Op.addRegOrImmOperands(Inst, 1); + } + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); +} + +void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptIdx; + + cvtVOP3Impl(Inst, Operands, OptIdx); + + // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 + // instruction, and then figure out where to actually put the modifiers + int Opc = Inst.getOpcode(); + + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI); + } + + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1); + + int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); + if (NegLoIdx != -1) { + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); + } + + const int Ops[] = { AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; + const int ModOps[] = { AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers }; + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); + + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); + unsigned NegLo = 0; + unsigned NegHi = 0; + + if (NegLoIdx != -1) { + int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); + NegLo = Inst.getOperand(NegLoIdx).getImm(); + NegHi = Inst.getOperand(NegHiIdx).getImm(); + } + + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); + if (OpIdx == -1) + break; + + uint32_t ModVal = 0; + + if ((OpSel & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_0; + + if ((OpSelHi & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_1; + + if ((NegLo & (1 << J)) != 0) + ModVal |= SISrcMods::NEG; + + if ((NegHi & (1 << J)) != 0) + ModVal |= SISrcMods::NEG_HI; + + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + + Inst.getOperand(ModIdx).setImm(ModVal); + } +} + //===----------------------------------------------------------------------===// // dpp //===----------------------------------------------------------------------===// @@ -3436,7 +3983,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); // Add the register arguments if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { - // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token. + // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. // Skip it. continue; } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { @@ -3547,6 +4094,7 @@ void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, uint64_t BasicInstType) { + using namespace llvm::AMDGPU::SDWA; OptionalImmIndexMap OptionalIdx; unsigned I = 1; @@ -3581,21 +4129,21 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, // V_NOP_sdwa_vi has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOP2: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOPC: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; default: |