diff options
Diffstat (limited to 'lib/Target/X86/AsmParser/X86AsmParser.cpp')
| -rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 1106 |
1 files changed, 619 insertions, 487 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c1d216c8b7af..87c65347e334 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "InstPrinter/X86IntelInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86TargetStreamer.h" #include "X86AsmInstrumentation.h" #include "X86AsmParserCommon.h" #include "X86Operand.h" @@ -37,6 +39,14 @@ using namespace llvm; +static bool checkScale(unsigned Scale, StringRef &ErrMsg) { + if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { + ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; + return true; + } + return false; +} + namespace { static const char OpPrecedence[] = { @@ -59,7 +69,6 @@ static const char OpPrecedence[] = { }; class X86AsmParser : public MCTargetAsmParser { - const MCInstrInfo &MII; ParseInstructionInfo *InstInfo; std::unique_ptr<X86AsmInstrumentation> Instrumentation; bool Code16GCC; @@ -72,6 +81,13 @@ private: return Result; } + X86TargetStreamer &getTargetStreamer() { + assert(getParser().getStreamer().getTargetStreamer() && + "do not have a target streamer"); + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast<X86TargetStreamer &>(TS); + } + unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, uint64_t &ErrorInfo, bool matchingInlineAsm, unsigned VariantID = 0) { @@ -125,8 +141,8 @@ private: int64_t popOperand() { assert (!PostfixStack.empty() && "Poped an empty stack!"); ICToken Op = PostfixStack.pop_back_val(); - assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) - && "Expected and immediate or register!"); + if (!(Op.first == IC_IMM || Op.first == IC_REGISTER)) + return -1; // The invalid Scale value will be caught later by checkScale return Op.second; } void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { @@ -293,6 +309,7 @@ private: }; enum IntelExprState { + IES_INIT, IES_OR, IES_XOR, IES_AND, @@ -320,16 +337,20 @@ private: int64_t Imm; const MCExpr *Sym; StringRef SymName; - bool StopOnLBrac, AddImmPrefix; InfixCalculator IC; InlineAsmIdentifierInfo Info; + short BracCount; + bool MemExpr; public: - IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : - State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), - Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac), - AddImmPrefix(addimmprefix) { Info.clear(); } + IntelExprStateMachine() + : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), + TmpReg(0), Scale(1), Imm(0), Sym(nullptr), BracCount(0), + MemExpr(false) {} + void addImm(int64_t imm) { Imm += imm; } + short getBracCount() { return BracCount; } + bool isMemExpr() { return MemExpr; } unsigned getBaseReg() { return BaseReg; } unsigned getIndexReg() { return IndexReg; } unsigned getScale() { return Scale; } @@ -339,13 +360,8 @@ private: bool isValidEndState() { return State == IES_RBRAC || State == IES_INTEGER; } - bool getStopOnLBrac() { return StopOnLBrac; } - bool getAddImmPrefix() { return AddImmPrefix; } bool hadError() { return State == IES_ERROR; } - - InlineAsmIdentifierInfo &getIdentifierInfo() { - return Info; - } + InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; } void onOr() { IntelExprState CurrState = State; @@ -422,7 +438,7 @@ private: } PrevState = CurrState; } - void onPlus() { + bool onPlus(StringRef &ErrMsg) { IntelExprState CurrState = State; switch (State) { default: @@ -439,7 +455,10 @@ private: if (!BaseReg) { BaseReg = TmpReg; } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); + if (IndexReg) { + ErrMsg = "BaseReg/IndexReg already set!"; + return true; + } IndexReg = TmpReg; Scale = 1; } @@ -447,8 +466,9 @@ private: break; } PrevState = CurrState; + return false; } - void onMinus() { + bool onMinus(StringRef &ErrMsg) { IntelExprState CurrState = State; switch (State) { default: @@ -470,12 +490,17 @@ private: case IES_RBRAC: case IES_INTEGER: case IES_REGISTER: + case IES_INIT: State = IES_MINUS; // push minus operator if it is not a negate operator if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || CurrState == IES_INTEGER || CurrState == IES_RBRAC) IC.pushOperator(IC_MINUS); - else + else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { + // We have negate operator for Scale: it's illegal + ErrMsg = "Scale can't be negative"; + return true; + } else IC.pushOperator(IC_NEG); if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { // If we already have a BaseReg, then assume this is the IndexReg with @@ -483,7 +508,10 @@ private: if (!BaseReg) { BaseReg = TmpReg; } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); + if (IndexReg) { + ErrMsg = "BaseReg/IndexReg already set!"; + return true; + } IndexReg = TmpReg; Scale = 1; } @@ -491,6 +519,7 @@ private: break; } PrevState = CurrState; + return false; } void onNot() { IntelExprState CurrState = State; @@ -511,13 +540,15 @@ private: case IES_MOD: case IES_LPAREN: case IES_LBRAC: + case IES_INIT: State = IES_NOT; IC.pushOperator(IC_NOT); break; } PrevState = CurrState; } - void onRegister(unsigned Reg) { + + bool onRegister(unsigned Reg, StringRef &ErrMsg) { IntelExprState CurrState = State; switch (State) { default: @@ -525,6 +556,7 @@ private: break; case IES_PLUS: case IES_LPAREN: + case IES_LBRAC: State = IES_REGISTER; TmpReg = Reg; IC.pushOperand(IC_REGISTER); @@ -532,11 +564,16 @@ private: case IES_MULTIPLY: // Index Register - Scale * Register if (PrevState == IES_INTEGER) { - assert (!IndexReg && "IndexReg already set!"); + if (IndexReg) { + ErrMsg = "BaseReg/IndexReg already set!"; + return true; + } State = IES_REGISTER; IndexReg = Reg; // Get the scale and replace the 'Scale * Register' with '0'. Scale = IC.popOperand(); + if (checkScale(Scale, ErrMsg)) + return true; IC.pushOperand(IC_IMM); IC.popOperator(); } else { @@ -545,9 +582,20 @@ private: break; } PrevState = CurrState; + return false; } - void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { + bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, + const InlineAsmIdentifierInfo &IDInfo, + bool ParsingInlineAsm, StringRef &ErrMsg) { + // InlineAsm: Treat an enum value as an integer + if (ParsingInlineAsm) + if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) + return onInteger(IDInfo.Enum.EnumVal, ErrMsg); + // Treat a symbolic constant like an integer + if (auto *CE = dyn_cast<MCConstantExpr>(SymRef)) + return onInteger(CE->getValue(), ErrMsg); PrevState = State; + bool HasSymbol = Sym != nullptr; switch (State) { default: State = IES_ERROR; @@ -555,12 +603,20 @@ private: case IES_PLUS: case IES_MINUS: case IES_NOT: + case IES_INIT: + case IES_LBRAC: + MemExpr = true; State = IES_INTEGER; Sym = SymRef; SymName = SymRefName; IC.pushOperand(IC_IMM); + if (ParsingInlineAsm) + Info = IDInfo; break; } + if (HasSymbol) + ErrMsg = "cannot use more than one symbol in memory operand"; + return HasSymbol; } bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { IntelExprState CurrState = State; @@ -580,16 +636,19 @@ private: case IES_MOD: case IES_MULTIPLY: case IES_LPAREN: + case IES_INIT: + case IES_LBRAC: State = IES_INTEGER; if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { // Index Register - Register * Scale - assert (!IndexReg && "IndexReg already set!"); + if (IndexReg) { + ErrMsg = "BaseReg/IndexReg already set!"; + return true; + } IndexReg = TmpReg; Scale = TmpInt; - if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { - ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; + if (checkScale(Scale, ErrMsg)) return true; - } // Get the scale and replace the 'Register * Scale' with '0'. IC.popOperator(); } else { @@ -640,19 +699,30 @@ private: break; } } - void onLBrac() { + bool onLBrac() { + if (BracCount) + return true; PrevState = State; switch (State) { default: State = IES_ERROR; break; case IES_RBRAC: + case IES_INTEGER: + case IES_RPAREN: State = IES_PLUS; IC.pushOperator(IC_PLUS); break; + case IES_INIT: + assert(!BracCount && "BracCount should be zero on parsing's start"); + State = IES_LBRAC; + break; } + MemExpr = true; + BracCount++; + return false; } - void onRBrac() { + bool onRBrac() { IntelExprState CurrState = State; switch (State) { default: @@ -661,6 +731,8 @@ private: case IES_INTEGER: case IES_REGISTER: case IES_RPAREN: + if (BracCount-- != 1) + return true; State = IES_RBRAC; if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { // If we already have a BaseReg, then assume this is the IndexReg with @@ -676,6 +748,7 @@ private: break; } PrevState = CurrState; + return false; } void onLParen() { IntelExprState CurrState = State; @@ -695,6 +768,8 @@ private: case IES_DIVIDE: case IES_MOD: case IES_LPAREN: + case IES_INIT: + case IES_LBRAC: State = IES_LPAREN; IC.pushOperator(IC_LPAREN); break; @@ -747,34 +822,41 @@ private: std::unique_ptr<X86Operand> ParseATTOperand(); std::unique_ptr<X86Operand> ParseIntelOperand(); std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator(); - bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); - unsigned IdentifyIntelOperator(StringRef Name); - unsigned ParseIntelOperator(unsigned OpKind); - std::unique_ptr<X86Operand> - ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); + bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); + unsigned IdentifyIntelInlineAsmOperator(StringRef Name); + unsigned ParseIntelInlineAsmOperator(unsigned OpKind); std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End); bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM); + void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, + SMLoc End); bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); - std::unique_ptr<X86Operand> - ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp, - bool isSymbol, unsigned Size); - bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, - InlineAsmIdentifierInfo &Info, - bool IsUnevaluatedOperand, SMLoc &End); + bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, SMLoc &End); std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + bool ParseIntelMemoryOperandSize(unsigned &Size); std::unique_ptr<X86Operand> CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, - InlineAsmIdentifierInfo &Info, - bool AllowBetterSizeMatch = false); + const InlineAsmIdentifierInfo &Info); bool parseDirectiveEven(SMLoc L); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); + /// CodeView FPO data directives. + bool parseDirectiveFPOProc(SMLoc L); + bool parseDirectiveFPOSetFrame(SMLoc L); + bool parseDirectiveFPOPushReg(SMLoc L); + bool parseDirectiveFPOStackAlloc(SMLoc L); + bool parseDirectiveFPOEndPrologue(SMLoc L); + bool parseDirectiveFPOEndProc(SMLoc L); + bool parseDirectiveFPOData(SMLoc L); + + bool validateInstruction(MCInst &Inst, const OperandVector &Ops); bool processInstruction(MCInst &Inst, const OperandVector &Ops); /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds @@ -828,7 +910,7 @@ private: MCSubtargetInfo &STI = copySTI(); FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); FeatureBitset OldMode = STI.getFeatureBits() & AllModes; - unsigned FB = ComputeAvailableFeatures( + uint64_t FB = ComputeAvailableFeatures( STI.ToggleFeature(OldMode.flip(mode))); setAvailableFeatures(FB); @@ -858,7 +940,7 @@ public: X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, const MCInstrInfo &mii, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr), + : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), Code16GCC(false) { // Initialize the set of available features. @@ -885,8 +967,8 @@ static unsigned MatchRegisterName(StringRef Name); /// } -static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg, - StringRef &ErrMsg) { +static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, + unsigned Scale, StringRef &ErrMsg) { // If we have both a base register and an index register make sure they are // both 64-bit or 32-bit registers. // To support VSIB, IndexReg can be 128-bit or 256-bit registers. @@ -925,7 +1007,7 @@ static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg, } } } - return false; + return checkScale(Scale, ErrMsg); } bool X86AsmParser::ParseRegister(unsigned &RegNo, @@ -1016,19 +1098,31 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, EndLoc = Parser.getTok().getEndLoc(); - // If this is "db[0-7]", match it as an alias - // for dr[0-7]. - if (RegNo == 0 && Tok.getString().size() == 3 && - Tok.getString().startswith("db")) { - switch (Tok.getString()[2]) { - case '0': RegNo = X86::DR0; break; - case '1': RegNo = X86::DR1; break; - case '2': RegNo = X86::DR2; break; - case '3': RegNo = X86::DR3; break; - case '4': RegNo = X86::DR4; break; - case '5': RegNo = X86::DR5; break; - case '6': RegNo = X86::DR6; break; - case '7': RegNo = X86::DR7; break; + // If this is "db[0-15]", match it as an alias + // for dr[0-15]. + if (RegNo == 0 && Tok.getString().startswith("db")) { + if (Tok.getString().size() == 3) { + switch (Tok.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + case '8': RegNo = X86::DR8; break; + case '9': RegNo = X86::DR9; break; + } + } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') { + switch (Tok.getString()[3]) { + case '0': RegNo = X86::DR10; break; + case '1': RegNo = X86::DR11; break; + case '2': RegNo = X86::DR12; break; + case '3': RegNo = X86::DR13; break; + case '4': RegNo = X86::DR14; break; + case '5': RegNo = X86::DR15; break; + } } if (RegNo != 0) { @@ -1198,124 +1292,48 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { return ParseATTOperand(); } -/// getIntelMemOperandSize - Return intel memory operand size. -static unsigned getIntelMemOperandSize(StringRef OpStr) { - unsigned Size = StringSwitch<unsigned>(OpStr) - .Cases("BYTE", "byte", 8) - .Cases("WORD", "word", 16) - .Cases("DWORD", "dword", 32) - .Cases("FWORD", "fword", 48) - .Cases("QWORD", "qword", 64) - .Cases("MMWORD","mmword", 64) - .Cases("XWORD", "xword", 80) - .Cases("TBYTE", "tbyte", 80) - .Cases("XMMWORD", "xmmword", 128) - .Cases("YMMWORD", "ymmword", 256) - .Cases("ZMMWORD", "zmmword", 512) - .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter - .Default(0); - return Size; -} - std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, - InlineAsmIdentifierInfo &Info, bool AllowBetterSizeMatch) { + const InlineAsmIdentifierInfo &Info) { // If we found a decl other than a VarDecl, then assume it is a FuncDecl or // some other label reference. - if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) { + if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { // Insert an explicit size if the user didn't have one. if (!Size) { Size = getPointerWidth(); InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, /*Len=*/0, Size); } - // Create an absolute memory reference in order to match against // instructions taking a PC relative operand. return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size, - Identifier, Info.OpDecl); + Identifier, Info.Label.Decl); } - - // We either have a direct symbol reference, or an offset from a symbol. The // parser always puts the symbol on the LHS, so look there for size // calculation purposes. unsigned FrontendSize = 0; - const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp); - bool IsSymRef = - isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp); - if (IsSymRef && !Size && Info.Type) - FrontendSize = Info.Type * 8; // Size is in terms of bits in this context. - - // When parsing inline assembly we set the base register to a non-zero value + void *Decl = nullptr; + bool IsGlobalLV = false; + if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { + // Size is in terms of bits in this context. + FrontendSize = Info.Var.Type * 8; + Decl = Info.Var.Decl; + IsGlobalLV = Info.Var.IsGlobalLV; + } + // It is widely common for MS InlineAsm to use a global variable and one/two + // registers in a mmory expression, and though unaccessible via rip/eip. + if (IsGlobalLV && (BaseReg || IndexReg)) { + return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End); + // Otherwise, we set the base register to a non-zero value // if we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. - BaseReg = BaseReg ? BaseReg : 1; - return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, - IndexReg, Scale, Start, End, Size, Identifier, - Info.OpDecl, FrontendSize); -} - -static void -RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites, - StringRef SymName, int64_t ImmDisp, - int64_t FinalImmDisp, SMLoc &BracLoc, - SMLoc &StartInBrac, SMLoc &End) { - // Remove the '[' and ']' from the IR string. - AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1); - AsmRewrites.emplace_back(AOK_Skip, End, 1); - - // If ImmDisp is non-zero, then we parsed a displacement before the - // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) - // If ImmDisp doesn't match the displacement computed by the state machine - // then we have an additional displacement in the bracketed expression. - if (ImmDisp != FinalImmDisp) { - if (ImmDisp) { - // We have an immediate displacement before the bracketed expression. - // Adjust this to match the final immediate displacement. - bool Found = false; - for (AsmRewrite &AR : AsmRewrites) { - if (AR.Loc.getPointer() > BracLoc.getPointer()) - continue; - if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) { - assert (!Found && "ImmDisp already rewritten."); - AR.Kind = AOK_Imm; - AR.Len = BracLoc.getPointer() - AR.Loc.getPointer(); - AR.Val = FinalImmDisp; - Found = true; - break; - } - } - assert (Found && "Unable to rewrite ImmDisp."); - (void)Found; - } else { - // We have a symbolic and an immediate displacement, but no displacement - // before the bracketed expression. Put the immediate displacement - // before the bracketed expression. - AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp); - } - } - // Remove all the ImmPrefix rewrites within the brackets. - // We may have some Imm rewrties as a result of an operator applying, - // remove them as well - for (AsmRewrite &AR : AsmRewrites) { - if (AR.Loc.getPointer() < StartInBrac.getPointer()) - continue; - if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) - AR.Kind = AOK_Delete; - } - const char *SymLocPtr = SymName.data(); - // Skip everything before the symbol. - if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { - assert(Len > 0 && "Expected a non-negative length."); - AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len); - } - // Skip everything after the symbol. - if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { - SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); - assert(Len > 0 && "Expected a non-negative length."); - AsmRewrites.emplace_back(AOK_Skip, Loc, Len); + } else { + BaseReg = BaseReg ? BaseReg : 1; + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, + IndexReg, Scale, Start, End, Size, Identifier, + Decl, FrontendSize); } } @@ -1348,77 +1366,80 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); + StringRef ErrMsg; AsmToken::TokenKind PrevTK = AsmToken::Error; bool Done = false; while (!Done) { bool UpdateLocLex = true; - AsmToken::TokenKind TK = getLexer().getKind(); - // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an - // identifier. Don't try an parse it as a register. - if (PrevTK != AsmToken::Error && Tok.getString().startswith(".") && - TK != AsmToken::Identifier) - break; - - // If we're parsing an immediate expression, we don't expect a '['. - if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) - break; switch (TK) { - default: { - if (SM.isValidEndState()) { - Done = true; + default: + if ((Done = SM.isValidEndState())) break; - } return Error(Tok.getLoc(), "unknown token in expression"); - } - case AsmToken::EndOfStatement: { + case AsmToken::EndOfStatement: Done = true; break; - } + case AsmToken::Real: + // DotOperator: [ebx].0 + UpdateLocLex = false; + if (ParseIntelDotOperator(SM, End)) + return true; + break; case AsmToken::String: case AsmToken::Identifier: { - // This could be a register or a symbolic displacement. - unsigned TmpReg; - const MCExpr *Val; SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); UpdateLocLex = false; - if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) { - SM.onRegister(TmpReg); - } else if (ParseIntelNamedOperator(Identifier, SM)) { - UpdateLocLex = true; - } else if (!isParsingInlineAsm()) { - if (getParser().parsePrimaryExpr(Val, End)) + // Register + unsigned Reg; + if (Tok.isNot(AsmToken::String) && !ParseRegister(Reg, IdentLoc, End)) { + if (SM.onRegister(Reg, ErrMsg)) + return Error(Tok.getLoc(), ErrMsg); + break; + } + // Operator synonymous ("not", "or" etc.) + if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM))) + break; + // Symbol reference, when parsing assembly content + InlineAsmIdentifierInfo Info; + const MCExpr *Val; + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) { return Error(Tok.getLoc(), "Unexpected identifier!"); - SM.onIdentifierExpr(Val, Identifier); - } else if (unsigned OpKind = IdentifyIntelOperator(Identifier)) { - if (OpKind == IOK_OFFSET) + } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { + return Error(IdentLoc, ErrMsg); + } else + break; + } + // MS InlineAsm operators (TYPE/LENGTH/SIZE) + if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { + if (OpKind == IOK_OFFSET) return Error(IdentLoc, "Dealing OFFSET operator as part of" "a compound immediate expression is yet to be supported"); - int64_t Val = ParseIntelOperator(OpKind); - if (!Val) + if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { + if (SM.onInteger(Val, ErrMsg)) + return Error(IdentLoc, ErrMsg); + } else return true; - StringRef ErrMsg; - if (SM.onInteger(Val, ErrMsg)) - return Error(IdentLoc, ErrMsg); - } else if (Identifier.find('.') != StringRef::npos && - PrevTK == AsmToken::RBrac) { - return false; - } else { - InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); - if (ParseIntelIdentifier(Val, Identifier, Info, - /*Unevaluated=*/false, End)) + break; + } + // MS Dot Operator expression + if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { + if (ParseIntelDotOperator(SM, End)) return true; - SM.onIdentifierExpr(Val, Identifier); + break; } + // MS InlineAsm identifier + if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) + return true; + else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) + return Error(IdentLoc, ErrMsg); break; } case AsmToken::Integer: { - StringRef ErrMsg; - if (isParsingInlineAsm() && SM.getAddImmPrefix()) - InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc()); // Look for 'b' or 'f' following an Integer as a directional label SMLoc Loc = getTok().getLoc(); int64_t IntVal = getTok().getIntVal(); @@ -1435,7 +1456,10 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); StringRef Identifier = Sym->getName(); - SM.onIdentifierExpr(Val, Identifier); + InlineAsmIdentifierInfo Info; + if (SM.onIdentifierExpr(Val, Identifier, Info, + isParsingInlineAsm(), ErrMsg)) + return Error(Loc, ErrMsg); End = consumeToken(); } else { if (SM.onInteger(IntVal, ErrMsg)) @@ -1447,11 +1471,18 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { } break; } - case AsmToken::Plus: SM.onPlus(); break; - case AsmToken::Minus: SM.onMinus(); break; + case AsmToken::Plus: + if (SM.onPlus(ErrMsg)) + return Error(getTok().getLoc(), ErrMsg); + break; + case AsmToken::Minus: + if (SM.onMinus(ErrMsg)) + return Error(getTok().getLoc(), ErrMsg); + break; case AsmToken::Tilde: SM.onNot(); break; case AsmToken::Star: SM.onStar(); break; case AsmToken::Slash: SM.onDivide(); break; + case AsmToken::Percent: SM.onMod(); break; case AsmToken::Pipe: SM.onOr(); break; case AsmToken::Caret: SM.onXor(); break; case AsmToken::Amp: SM.onAnd(); break; @@ -1459,8 +1490,14 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { SM.onLShift(); break; case AsmToken::GreaterGreater: SM.onRShift(); break; - case AsmToken::LBrac: SM.onLBrac(); break; - case AsmToken::RBrac: SM.onRBrac(); break; + case AsmToken::LBrac: + if (SM.onLBrac()) + return Error(Tok.getLoc(), "unexpected bracket encountered"); + break; + case AsmToken::RBrac: + if (SM.onRBrac()) + return Error(Tok.getLoc(), "unexpected bracket encountered"); + break; case AsmToken::LParen: SM.onLParen(); break; case AsmToken::RParen: SM.onRParen(); break; } @@ -1475,112 +1512,49 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { return false; } -std::unique_ptr<X86Operand> -X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, - int64_t ImmDisp, bool isSymbol, - unsigned Size) { - MCAsmParser &Parser = getParser(); - const AsmToken &Tok = Parser.getTok(); - SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); - if (getLexer().isNot(AsmToken::LBrac)) - return ErrorOperand(BracLoc, "Expected '[' token!"); - Parser.Lex(); // Eat '[' - - SMLoc StartInBrac = Parser.getTok().getLoc(); - // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We - // may have already parsed an immediate displacement before the bracketed - // expression. - IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); - if (ParseIntelExpression(SM, End)) - return nullptr; - - const MCExpr *Disp = nullptr; - if (const MCExpr *Sym = SM.getSym()) { - // A symbolic displacement. - Disp = Sym; - if (isParsingInlineAsm()) - RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(), - ImmDisp, SM.getImm(), BracLoc, StartInBrac, - End); - } - - if (SM.getImm() || !Disp) { - const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext()); - if (Disp) - Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext()); - else - Disp = Imm; // An immediate displacement only. - } - - // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC - // will in fact do global lookup the field name inside all global typedefs, - // but we don't emulate that. - if ((Parser.getTok().getKind() == AsmToken::Identifier || - Parser.getTok().getKind() == AsmToken::Dot || - Parser.getTok().getKind() == AsmToken::Real) && - Parser.getTok().getString().find('.') != StringRef::npos) { - const MCExpr *NewDisp; - if (ParseIntelDotOperator(Disp, NewDisp)) - return nullptr; - - End = Tok.getEndLoc(); - Parser.Lex(); // Eat the field. - Disp = NewDisp; - } - - if (isSymbol) { - if (SM.getSym()) { - Error(Start, "cannot use more than one symbol in memory operand"); - return nullptr; +void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM, + SMLoc Start, SMLoc End) { + SMLoc Loc = Start; + unsigned ExprLen = End.getPointer() - Start.getPointer(); + // Skip everything before a symbol displacement (if we have one) + if (SM.getSym()) { + StringRef SymName = SM.getSymName(); + if (unsigned Len = SymName.data() - Start.getPointer()) + InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); + Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); + ExprLen = End.getPointer() - (SymName.data() + SymName.size()); + // If we have only a symbol than there's no need for complex rewrite, + // simply skip everything after it + if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) { + if (ExprLen) + InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen); + return; } - if (SM.getBaseReg()) { - Error(Start, "cannot use base register with variable reference"); - return nullptr; - } - if (SM.getIndexReg()) { - Error(Start, "cannot use index register with variable reference"); - return nullptr; - } - } - - int BaseReg = SM.getBaseReg(); - int IndexReg = SM.getIndexReg(); - int Scale = SM.getScale(); - if (!isParsingInlineAsm()) { - // handle [-42] - if (!BaseReg && !IndexReg) { - if (!SegReg) - return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); - return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, - Start, End, Size); - } - StringRef ErrMsg; - if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { - Error(StartInBrac, ErrMsg); - return nullptr; - } - return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, - IndexReg, Scale, Start, End, Size); } - - InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); - return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, - End, Size, SM.getSymName(), Info, - isParsingInlineAsm()); + // Build an Intel Expression rewrite + StringRef BaseRegStr; + StringRef IndexRegStr; + if (SM.getBaseReg()) + BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); + if (SM.getIndexReg()) + IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); + // Emit it + IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr()); + InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); } // Inline assembly may use variable names with namespace alias qualifiers. -bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, - StringRef &Identifier, - InlineAsmIdentifierInfo &Info, - bool IsUnevaluatedOperand, SMLoc &End) { +bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, + StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, + SMLoc &End) { MCAsmParser &Parser = getParser(); assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); Val = nullptr; StringRef LineBuf(Identifier.data()); - void *Result = - SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); + SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); const AsmToken &Tok = Parser.getTok(); SMLoc Loc = Tok.getLoc(); @@ -1596,12 +1570,13 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, // The frontend should end parsing on an assembler token boundary, unless it // failed parsing. - assert((End.getPointer() == EndPtr || !Result) && - "frontend claimed part of a token?"); + assert((End.getPointer() == EndPtr || + Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && + "frontend claimed part of a token?"); // If the identifier lookup was unsuccessful, assume that we are dealing with // a label. - if (!Result) { + if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { StringRef InternalName = SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), Loc, false); @@ -1609,8 +1584,8 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, // Push a rewrite for replacing the identifier name with the internal name. InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), InternalName); - } - + } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) + return false; // Create the symbol reference. MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; @@ -1618,57 +1593,6 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, return false; } -/// \brief Parse intel style segment override. -std::unique_ptr<X86Operand> -X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, - unsigned Size) { - MCAsmParser &Parser = getParser(); - assert(SegReg != 0 && "Tried to parse a segment override without a segment!"); - const AsmToken &Tok = Parser.getTok(); // Eat colon. - if (Tok.isNot(AsmToken::Colon)) - return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); - Parser.Lex(); // Eat ':' - - int64_t ImmDisp = 0; - if (getLexer().is(AsmToken::Integer)) { - ImmDisp = Tok.getIntVal(); - AsmToken ImmDispToken = Parser.Lex(); // Eat the integer. - - if (isParsingInlineAsm()) - InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc()); - - if (getLexer().isNot(AsmToken::LBrac)) { - // An immediate following a 'segment register', 'colon' token sequence can - // be followed by a bracketed expression. If it isn't we know we have our - // final segment override. - const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext()); - return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, - /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1, - Start, ImmDispToken.getEndLoc(), Size); - } - } - - if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size); - - const MCExpr *Val; - SMLoc End; - if (!isParsingInlineAsm()) { - if (getParser().parsePrimaryExpr(Val, End)) - return ErrorOperand(Tok.getLoc(), "unknown token in expression"); - - return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size); - } - - InlineAsmIdentifierInfo Info; - StringRef Identifier = Tok.getString(); - if (ParseIntelIdentifier(Val, Identifier, Info, - /*Unevaluated=*/false, End)) - return nullptr; - return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, - /*Scale=*/1, Start, End, Size, Identifier, Info); -} - //ParseRoundingModeOp - Parse AVX-512 rounding mode operand std::unique_ptr<X86Operand> X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { @@ -1708,17 +1632,9 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { } /// Parse the '.' operator. -bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, - const MCExpr *&NewDisp) { - MCAsmParser &Parser = getParser(); - const AsmToken &Tok = Parser.getTok(); - int64_t OrigDispVal, DotDispVal; - - // FIXME: Handle non-constant expressions. - if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) - OrigDispVal = OrigDisp->getValue(); - else - return Error(Tok.getLoc(), "Non-constant offsets are not supported!"); +bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) { + const AsmToken &Tok = getTok(); + unsigned Offset; // Drop the optional '.'. StringRef DotDispStr = Tok.getString(); @@ -1729,24 +1645,21 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, if (Tok.is(AsmToken::Real)) { APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); - DotDispVal = DotDisp.getZExtValue(); + Offset = DotDisp.getZExtValue(); } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { - unsigned DotDisp; std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, - DotDisp)) + Offset)) return Error(Tok.getLoc(), "Unable to lookup field reference!"); - DotDispVal = DotDisp; } else return Error(Tok.getLoc(), "Unexpected token type!"); - if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { - SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); - unsigned Len = DotDispStr.size(); - InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, DotDispVal); - } - - NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); + // Eat the DotExpression and update End + End = SMLoc::getFromPointer(DotDispStr.data()); + const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); + while (Tok.getLoc().getPointer() < DotExprEndLoc) + Lex(); + SM.addImm(Offset); return false; } @@ -1762,10 +1675,16 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() { InlineAsmIdentifierInfo Info; SMLoc Start = Tok.getLoc(), End; StringRef Identifier = Tok.getString(); - if (ParseIntelIdentifier(Val, Identifier, Info, - /*Unevaluated=*/false, End)) + if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, + /*Unevaluated=*/false, End)) return nullptr; + void *Decl = nullptr; + // FIXME: MS evaluates "offset <Constant>" to the underlying integral + if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) + return ErrorOperand(Start, "offset operator cannot yet handle constants"); + else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) + Decl = Info.Var.Decl; // Don't emit the offset operator. InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); @@ -1776,12 +1695,12 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() { unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX); return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, - OffsetOfLoc, Identifier, Info.OpDecl); + OffsetOfLoc, Identifier, Decl); } // Query a candidate string for being an Intel assembly operator // Report back its kind, or IOK_INVALID if does not evaluated as a known one -unsigned X86AsmParser::IdentifyIntelOperator(StringRef Name) { +unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { return StringSwitch<unsigned>(Name) .Cases("TYPE","type",IOK_TYPE) .Cases("SIZE","size",IOK_SIZE) @@ -1796,41 +1715,62 @@ unsigned X86AsmParser::IdentifyIntelOperator(StringRef Name) { /// variable. A variable's size is the product of its LENGTH and TYPE. The /// TYPE operator returns the size of a C or C++ type or variable. If the /// variable is an array, TYPE returns the size of a single element. -unsigned X86AsmParser::ParseIntelOperator(unsigned OpKind) { +unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); - SMLoc TypeLoc = Tok.getLoc(); Parser.Lex(); // Eat operator. const MCExpr *Val = nullptr; InlineAsmIdentifierInfo Info; SMLoc Start = Tok.getLoc(), End; StringRef Identifier = Tok.getString(); - if (ParseIntelIdentifier(Val, Identifier, Info, - /*Unevaluated=*/true, End)) + if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, + /*Unevaluated=*/true, End)) return 0; - if (!Info.OpDecl) { + if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { Error(Start, "unable to lookup expression"); return 0; } - + unsigned CVal = 0; switch(OpKind) { default: llvm_unreachable("Unexpected operand kind!"); - case IOK_LENGTH: CVal = Info.Length; break; - case IOK_SIZE: CVal = Info.Size; break; - case IOK_TYPE: CVal = Info.Type; break; + case IOK_LENGTH: CVal = Info.Var.Length; break; + case IOK_SIZE: CVal = Info.Var.Size; break; + case IOK_TYPE: CVal = Info.Var.Type; break; } - // Rewrite the type operator and the C or C++ type or variable in terms of an - // immediate. E.g. TYPE foo -> $$4 - unsigned Len = End.getPointer() - TypeLoc.getPointer(); - InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal); - return CVal; } +bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { + Size = StringSwitch<unsigned>(getTok().getString()) + .Cases("BYTE", "byte", 8) + .Cases("WORD", "word", 16) + .Cases("DWORD", "dword", 32) + .Cases("FLOAT", "float", 32) + .Cases("LONG", "long", 32) + .Cases("FWORD", "fword", 48) + .Cases("DOUBLE", "double", 64) + .Cases("QWORD", "qword", 64) + .Cases("MMWORD","mmword", 64) + .Cases("XWORD", "xword", 80) + .Cases("TBYTE", "tbyte", 80) + .Cases("XMMWORD", "xmmword", 128) + .Cases("YMMWORD", "ymmword", 256) + .Cases("ZMMWORD", "zmmword", 512) + .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter + .Default(0); + if (Size) { + const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word). + if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr"))) + return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); + Lex(); // Eat ptr. + } + return false; +} + std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); @@ -1840,100 +1780,76 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { // Should be handled as part of immediate expression, as other operators // Currently, only supported as a stand-alone operand if (isParsingInlineAsm()) - if (IdentifyIntelOperator(Tok.getString()) == IOK_OFFSET) + if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET) return ParseIntelOffsetOfOperator(); - bool PtrInOperand = false; - unsigned Size = getIntelMemOperandSize(Tok.getString()); - if (Size) { - Parser.Lex(); // Eat operand size (e.g., byte, word). - if (Tok.getString() != "PTR" && Tok.getString() != "ptr") - return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); - Parser.Lex(); // Eat ptr. - PtrInOperand = true; - } + // Parse optional Size directive. + unsigned Size; + if (ParseIntelMemoryOperandSize(Size)) + return nullptr; + bool PtrInOperand = bool(Size); Start = Tok.getLoc(); - // rounding mode token + // Rounding mode operand. if (getSTI().getFeatureBits()[X86::FeatureAVX512] && getLexer().is(AsmToken::LCurly)) return ParseRoundingModeOp(Start, End); - // Register. + // Register operand. unsigned RegNo = 0; - if (getLexer().is(AsmToken::Identifier) && - !ParseRegister(RegNo, Start, End)) { - // If this is a segment register followed by a ':', then this is the start - // of a segment override, otherwise this is a normal register reference. - // In case it is a normal register and there is ptr in the operand this - // is an error + if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) { if (RegNo == X86::RIP) return ErrorOperand(Start, "rip can only be used as a base register"); - if (getLexer().isNot(AsmToken::Colon)) { - if (PtrInOperand) { - return ErrorOperand(Start, "expected memory operand after " - "'ptr', found register operand instead"); - } - return X86Operand::CreateReg(RegNo, Start, End); - } - return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); + // A Register followed by ':' is considered a segment override + if (Tok.isNot(AsmToken::Colon)) + return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) : + ErrorOperand(Start, "expected memory operand after 'ptr', " + "found register operand instead"); + // An alleged segment override. check if we have a valid segment register + if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) + return ErrorOperand(Start, "invalid segment register"); + // Eat ':' and update Start location + Start = Lex().getLoc(); } // Immediates and Memory - - // Parse [ BaseReg + Scale*IndexReg + Disp ]. - if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false, - Size); - - AsmToken StartTok = Tok; - IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, - /*AddImmPrefix=*/false); + IntelExprStateMachine SM; if (ParseIntelExpression(SM, End)) return nullptr; - bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant; - int64_t Imm = SM.getImm(); - if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant) - SM.getSym()->evaluateAsAbsolute(Imm); - - if (StartTok.isNot(AsmToken::Identifier) && - StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) { - unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); - if (StartTok.getString().size() == Len) - // Just add a prefix if this wasn't a complex immediate expression. - InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start); - else - // Otherwise, rewrite the complex expression as a single immediate. - InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm); - } + if (isParsingInlineAsm()) + RewriteIntelExpression(SM, Start, Tok.getLoc()); - if (getLexer().isNot(AsmToken::LBrac)) { - // If a directional label (ie. 1f or 2b) was parsed above from - // ParseIntelExpression() then SM.getSym() was set to a pointer to - // to the MCExpr with the directional local symbol and this is a - // memory operand not an immediate operand. - if (isSymbol) { - if (isParsingInlineAsm()) - return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0, - /*IndexReg=*/0, - /*Scale=*/1, Start, End, Size, - SM.getSymName(), SM.getIdentifierInfo()); - return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End, - Size); - } + int64_t Imm = SM.getImm(); + const MCExpr *Disp = SM.getSym(); + const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); + if (Disp && Imm) + Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); + if (!Disp) + Disp = ImmDisp; - const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext()); - return X86Operand::CreateImm(ImmExpr, Start, End); - } + // RegNo != 0 specifies a valid segment register, + // and we are parsing a segment override + if (!SM.isMemExpr() && !RegNo) + return X86Operand::CreateImm(Disp, Start, End); - // Only positive immediates are valid. - if (Imm < 0) - return ErrorOperand(Start, "expected a positive immediate displacement " - "before bracketed expr."); + StringRef ErrMsg; + unsigned BaseReg = SM.getBaseReg(); + unsigned IndexReg = SM.getIndexReg(); + unsigned Scale = SM.getScale(); - return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size); + if ((BaseReg || IndexReg) && + CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, ErrMsg)) + return ErrorOperand(Start, ErrMsg); + if (isParsingInlineAsm()) + return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg, + Scale, Start, End, Size, SM.getSymName(), + SM.getIdentifierInfo()); + if (!(BaseReg || IndexReg || RegNo)) + return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); + return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, + BaseReg, IndexReg, Scale, Start, End, Size); } std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { @@ -2055,14 +1971,20 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, // no errors. // Query for the need of further parsing for a {%k<NUM>} mark if (!Z || getLexer().is(AsmToken::LCurly)) { - const SMLoc StartLoc = Z ? consumeToken() : consumedToken; + SMLoc StartLoc = Z ? consumeToken() : consumedToken; // Parse an op-mask register mark ({%k<NUM>}), which is now to be // expected - if (std::unique_ptr<X86Operand> Op = ParseOperand()) { + unsigned RegNo; + SMLoc RegLoc; + if (!ParseRegister(RegNo, RegLoc, StartLoc) && + X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) { + if (RegNo == X86::K0) + return Error(RegLoc, "Register k0 can't be used as write mask"); if (!getLexer().is(AsmToken::RCurly)) return Error(getLexer().getLoc(), "Expected } at this point"); Operands.push_back(X86Operand::CreateToken("{", StartLoc)); - Operands.push_back(std::move(Op)); + Operands.push_back( + X86Operand::CreateReg(RegNo, StartLoc, StartLoc)); Operands.push_back(X86Operand::CreateToken("}", consumeToken())); } else return Error(getLexer().getLoc(), @@ -2072,7 +1994,8 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, // Have we've found a parsing error, or found no (expected) {z} mark // - report an error if (ParseZ(Z, consumeToken()) || !Z) - return true; + return Error(getLexer().getLoc(), + "Expected a {z} mark at this point"); } // '{z}' on its own is meaningless, hence should be ignored. @@ -2125,9 +2048,12 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, // memory operand consumed. } else { SMLoc ExprEnd; + getLexer().UnLex(AsmToken(AsmToken::LParen, "(")); - // It must be an parenthesized expression, parse it now. - if (getParser().parseParenExpression(Disp, ExprEnd)) + // It must be either an parenthesized expression, or an expression that + // begins from a parenthesized expression, parse it now. Example: (1+2) or + // (1+2)+3 + if (getParser().parseExpression(Disp, ExprEnd)) return nullptr; // After parsing the base expression we could either have a parenthesized @@ -2258,7 +2184,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, } StringRef ErrMsg; - if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { + if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, ErrMsg)) { Error(BaseLoc, ErrMsg); return nullptr; } @@ -2275,7 +2201,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, InstInfo = &Info; StringRef PatchedName = Name; - if (Name == "jmp" && isParsingIntelSyntax() && isParsingInlineAsm()) { + if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) && + isParsingIntelSyntax() && isParsingInlineAsm()) { StringRef NextTok = Parser.getTok().getString(); if (NextTok == "short") { SMLoc NameEndLoc = @@ -2417,22 +2344,57 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); // Determine whether this is an instruction prefix. - bool isPrefix = - Name == "lock" || Name == "rep" || - Name == "repe" || Name == "repz" || - Name == "repne" || Name == "repnz" || - Name == "rex64" || Name == "data16" || Name == "data32"; + // FIXME: + // Enhance prefixes integrity robustness. for example, following forms + // are currently tolerated: + // repz repnz <insn> ; GAS errors for the use of two similar prefixes + // lock addq %rax, %rbx ; Destination operand must be of memory type + // xacquire <insn> ; xacquire must be accompanied by 'lock' + bool isPrefix = StringSwitch<bool>(Name) + .Cases("rex64", "data32", "data16", true) + .Cases("xacquire", "xrelease", true) + .Cases("acquire", "release", isParsingIntelSyntax()) + .Default(false); + + auto isLockRepeatPrefix = [](StringRef N) { + return StringSwitch<bool>(N) + .Cases("lock", "rep", "repe", "repz", "repne", "repnz", true) + .Default(false); + }; bool CurlyAsEndOfStatement = false; + + unsigned Flags = X86::IP_NO_PREFIX; + while (isLockRepeatPrefix(Name.lower())) { + unsigned Prefix = + StringSwitch<unsigned>(Name) + .Cases("lock", "lock", X86::IP_HAS_LOCK) + .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) + .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) + .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) + Flags |= Prefix; + Name = Parser.getTok().getString(); + Parser.Lex(); // eat the prefix + // Hack: we could have something like + // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" + while (Name.startswith(";") || Name.startswith("\n") || + Name.startswith("\t") || Name.startswith("/")) { + Name = Parser.getTok().getString(); + Parser.Lex(); // go to next prefix or instr + } + } + + if (Flags) + PatchedName = Name; + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + // This does the actual operand parsing. Don't parse any more if we have a // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we // just want to parse the "lock" as the first instruction and the "incl" as // the next one. if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { - // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) Operands.push_back(X86Operand::CreateToken("*", consumeToken())); @@ -2670,6 +2632,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } + if (Flags) + Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); return false; } @@ -2677,12 +2641,79 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { return false; } +bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); + + switch (Inst.getOpcode()) { + case X86::VGATHERDPDYrm: + case X86::VGATHERDPDrm: + case X86::VGATHERDPSYrm: + case X86::VGATHERDPSrm: + case X86::VGATHERQPDYrm: + case X86::VGATHERQPDrm: + case X86::VGATHERQPSYrm: + case X86::VGATHERQPSrm: + case X86::VPGATHERDDYrm: + case X86::VPGATHERDDrm: + case X86::VPGATHERDQYrm: + case X86::VPGATHERDQrm: + case X86::VPGATHERQDYrm: + case X86::VPGATHERQDrm: + case X86::VPGATHERQQYrm: + case X86::VPGATHERQQrm: { + unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + unsigned Index = + MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg()); + if (Dest == Mask || Dest == Index || Mask == Index) + return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " + "registers should be distinct"); + break; + } + case X86::VGATHERDPDZ128rm: + case X86::VGATHERDPDZ256rm: + case X86::VGATHERDPDZrm: + case X86::VGATHERDPSZ128rm: + case X86::VGATHERDPSZ256rm: + case X86::VGATHERDPSZrm: + case X86::VGATHERQPDZ128rm: + case X86::VGATHERQPDZ256rm: + case X86::VGATHERQPDZrm: + case X86::VGATHERQPSZ128rm: + case X86::VGATHERQPSZ256rm: + case X86::VGATHERQPSZrm: + case X86::VPGATHERDDZ128rm: + case X86::VPGATHERDDZ256rm: + case X86::VPGATHERDDZrm: + case X86::VPGATHERDQZ128rm: + case X86::VPGATHERDQZ256rm: + case X86::VPGATHERDQZrm: + case X86::VPGATHERQDZ128rm: + case X86::VPGATHERQDZ256rm: + case X86::VPGATHERQDZrm: + case X86::VPGATHERQQZ128rm: + case X86::VPGATHERQQZ256rm: + case X86::VPGATHERQQZrm: { + unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Index = + MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg()); + if (Dest == Index) + return Warning(Ops[0]->getStartLoc(), "index and destination registers " + "should be distinct"); + break; + } + } + + return false; +} + static const char *getSubtargetFeatureName(uint64_t Val); void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out) { - Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(), - MII, Out); + Instrumentation->InstrumentAndEmitInstruction( + Inst, Operands, getContext(), MII, Out, + getParser().shouldPrintSchedInfo()); } bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -2737,6 +2768,16 @@ bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); } +static unsigned getPrefixes(OperandVector &Operands) { + unsigned Result = 0; + X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back()); + if (Prefix.isPrefix()) { + Result = Prefix.getPrefix(); + Operands.pop_back(); + } + return Result; +} + bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2751,13 +2792,20 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); bool WasOriginallyInvalidOperand = false; + unsigned Prefixes = getPrefixes(Operands); + MCInst Inst; + if (Prefixes) + Inst.setFlags(Prefixes); + // First, try a direct match. switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())) { default: llvm_unreachable("Unexpected match result!"); case Match_Success: + if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) + return true; // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. @@ -2917,12 +2965,16 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, StringRef Mnemonic = Op.getToken(); SMRange EmptyRange = None; StringRef Base = Op.getToken(); + unsigned Prefixes = getPrefixes(Operands); // First, handle aliases that expand to multiple instructions. MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); MCInst Inst; + if (Prefixes) + Inst.setFlags(Prefixes); + // Find one unsized memory operand, if present. X86Operand *UnsizedMemOp = nullptr; for (const auto &Op : Operands) { @@ -3043,6 +3095,8 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, // instruction will already have been filled in correctly, since the failing // matches won't have modified it). if (NumSuccessfulMatches == 1) { + if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) + return true; // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the individual // transformations can chain off each other. @@ -3121,6 +3175,19 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return false; } else if (IDVal == ".even") return parseDirectiveEven(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_proc") + return parseDirectiveFPOProc(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_setframe") + return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_pushreg") + return parseDirectiveFPOPushReg(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_stackalloc") + return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_endprologue") + return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_endproc") + return parseDirectiveFPOEndProc(DirectiveID.getLoc()); + return true; } @@ -3218,6 +3285,71 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { return false; } +// .cv_fpo_proc foo +bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { + MCAsmParser &Parser = getParser(); + StringRef ProcName; + int64_t ParamsSize; + if (Parser.parseIdentifier(ProcName)) + return Parser.TokError("expected symbol name"); + if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) + return true; + if (!isUIntN(32, ParamsSize)) + return Parser.TokError("parameters size out of range"); + if (Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_proc' directive"); + MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); + return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); +} + +// .cv_fpo_setframe ebp +bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { + MCAsmParser &Parser = getParser(); + unsigned Reg; + SMLoc DummyLoc; + if (ParseRegister(Reg, DummyLoc, DummyLoc) || + Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_setframe' directive"); + return getTargetStreamer().emitFPOSetFrame(Reg, L); +} + +// .cv_fpo_pushreg ebx +bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { + MCAsmParser &Parser = getParser(); + unsigned Reg; + SMLoc DummyLoc; + if (ParseRegister(Reg, DummyLoc, DummyLoc) || + Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_pushreg' directive"); + return getTargetStreamer().emitFPOPushReg(Reg, L); +} + +// .cv_fpo_stackalloc 20 +bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { + MCAsmParser &Parser = getParser(); + int64_t Offset; + if (Parser.parseIntToken(Offset, "expected offset") || + Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_stackalloc' directive"); + return getTargetStreamer().emitFPOStackAlloc(Offset, L); +} + +// .cv_fpo_endprologue +bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_endprologue' directive"); + return getTargetStreamer().emitFPOEndPrologue(L); +} + +// .cv_fpo_endproc +bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_endproc' directive"); + return getTargetStreamer().emitFPOEndProc(L); +} + // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); |
