diff options
author | Ed Schouten <ed@FreeBSD.org> | 2009-07-04 13:58:26 +0000 |
---|---|---|
committer | Ed Schouten <ed@FreeBSD.org> | 2009-07-04 13:58:26 +0000 |
commit | 18f153bdb9db52e7089a2d5293b96c45a3124a26 (patch) | |
tree | 84360c8989c912127a383af37c4b1aa5767bd16e /tools/llvm-mc | |
parent | f859468f5a21b6952ab62917777f9fb3bba57003 (diff) |
Diffstat (limited to 'tools/llvm-mc')
-rw-r--r-- | tools/llvm-mc/AsmExpr.cpp | 162 | ||||
-rw-r--r-- | tools/llvm-mc/AsmExpr.h | 179 | ||||
-rw-r--r-- | tools/llvm-mc/AsmLexer.cpp | 60 | ||||
-rw-r--r-- | tools/llvm-mc/AsmLexer.h | 12 | ||||
-rw-r--r-- | tools/llvm-mc/AsmParser.cpp | 419 | ||||
-rw-r--r-- | tools/llvm-mc/AsmParser.h | 63 | ||||
-rw-r--r-- | tools/llvm-mc/CMakeLists.txt | 1 | ||||
-rw-r--r-- | tools/llvm-mc/MC-X86Specific.cpp | 176 | ||||
-rw-r--r-- | tools/llvm-mc/llvm-mc.cpp | 41 |
9 files changed, 945 insertions, 168 deletions
diff --git a/tools/llvm-mc/AsmExpr.cpp b/tools/llvm-mc/AsmExpr.cpp new file mode 100644 index 000000000000..c3362e4268c8 --- /dev/null +++ b/tools/llvm-mc/AsmExpr.cpp @@ -0,0 +1,162 @@ +//===- AsmExpr.cpp - Assembly file expressions ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AsmExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +using namespace llvm; + +AsmExpr::~AsmExpr() { +} + +bool AsmExpr::EvaluateAsAbsolute(MCContext &Ctx, int64_t &Res) const { + MCValue Value; + + if (!EvaluateAsRelocatable(Ctx, Value) || !Value.isAbsolute()) + return false; + + Res = Value.getConstant(); + return true; +} + +static bool EvaluateSymbolicAdd(const MCValue &LHS, MCSymbol *RHS_A, + MCSymbol *RHS_B, int64_t RHS_Cst, + MCValue &Res) { + // We can't add or subtract two symbols. + if ((LHS.getSymA() && RHS_A) || + (LHS.getSymB() && RHS_B)) + return false; + + MCSymbol *A = LHS.getSymA() ? LHS.getSymA() : RHS_A; + MCSymbol *B = LHS.getSymB() ? LHS.getSymB() : RHS_B; + if (B) { + // If we have a negated symbol, then we must have also have a non-negated + // symbol in order to encode the expression. We can do this check later to + // permit expressions which eventually fold to a representable form -- such + // as (a + (0 - b)) -- if necessary. + if (!A) + return false; + } + Res = MCValue::get(A, B, LHS.getConstant() + RHS_Cst); + return true; +} + +bool AsmExpr::EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const { + switch (getKind()) { + default: + assert(0 && "Invalid assembly expression kind!"); + + case Constant: + Res = MCValue::get(cast<AsmConstantExpr>(this)->getValue()); + return true; + + case SymbolRef: { + MCSymbol *Sym = cast<AsmSymbolRefExpr>(this)->getSymbol(); + if (const MCValue *Value = Ctx.GetSymbolValue(Sym)) + Res = *Value; + else + Res = MCValue::get(Sym, 0, 0); + return true; + } + + case Unary: { + const AsmUnaryExpr *AUE = cast<AsmUnaryExpr>(this); + MCValue Value; + + if (!AUE->getSubExpr()->EvaluateAsRelocatable(Ctx, Value)) + return false; + + switch (AUE->getOpcode()) { + case AsmUnaryExpr::LNot: + if (!Value.isAbsolute()) + return false; + Res = MCValue::get(!Value.getConstant()); + break; + case AsmUnaryExpr::Minus: + /// -(a - b + const) ==> (b - a - const) + if (Value.getSymA() && !Value.getSymA()) + return false; + Res = MCValue::get(Value.getSymB(), Value.getSymA(), + -Value.getConstant()); + break; + case AsmUnaryExpr::Not: + if (!Value.isAbsolute()) + return false; + Res = MCValue::get(~Value.getConstant()); + break; + case AsmUnaryExpr::Plus: + Res = Value; + break; + } + + return true; + } + + case Binary: { + const AsmBinaryExpr *ABE = cast<AsmBinaryExpr>(this); + MCValue LHSValue, RHSValue; + + if (!ABE->getLHS()->EvaluateAsRelocatable(Ctx, LHSValue) || + !ABE->getRHS()->EvaluateAsRelocatable(Ctx, RHSValue)) + return false; + + // We only support a few operations on non-constant expressions, handle + // those first. + if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) { + switch (ABE->getOpcode()) { + default: + return false; + case AsmBinaryExpr::Sub: + // Negate RHS and add. + return EvaluateSymbolicAdd(LHSValue, + RHSValue.getSymB(), RHSValue.getSymA(), + -RHSValue.getConstant(), + Res); + + case AsmBinaryExpr::Add: + return EvaluateSymbolicAdd(LHSValue, + RHSValue.getSymA(), RHSValue.getSymB(), + RHSValue.getConstant(), + Res); + } + } + + // FIXME: We need target hooks for the evaluation. It may be limited in + // width, and gas defines the result of comparisons differently from Apple + // as (the result is sign extended). + int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant(); + int64_t Result = 0; + switch (ABE->getOpcode()) { + case AsmBinaryExpr::Add: Result = LHS + RHS; break; + case AsmBinaryExpr::And: Result = LHS & RHS; break; + case AsmBinaryExpr::Div: Result = LHS / RHS; break; + case AsmBinaryExpr::EQ: Result = LHS == RHS; break; + case AsmBinaryExpr::GT: Result = LHS > RHS; break; + case AsmBinaryExpr::GTE: Result = LHS >= RHS; break; + case AsmBinaryExpr::LAnd: Result = LHS && RHS; break; + case AsmBinaryExpr::LOr: Result = LHS || RHS; break; + case AsmBinaryExpr::LT: Result = LHS < RHS; break; + case AsmBinaryExpr::LTE: Result = LHS <= RHS; break; + case AsmBinaryExpr::Mod: Result = LHS % RHS; break; + case AsmBinaryExpr::Mul: Result = LHS * RHS; break; + case AsmBinaryExpr::NE: Result = LHS != RHS; break; + case AsmBinaryExpr::Or: Result = LHS | RHS; break; + case AsmBinaryExpr::Shl: Result = LHS << RHS; break; + case AsmBinaryExpr::Shr: Result = LHS >> RHS; break; + case AsmBinaryExpr::Sub: Result = LHS - RHS; break; + case AsmBinaryExpr::Xor: Result = LHS ^ RHS; break; + } + + Res = MCValue::get(Result); + return true; + } + } +} + diff --git a/tools/llvm-mc/AsmExpr.h b/tools/llvm-mc/AsmExpr.h new file mode 100644 index 000000000000..84e58ffd9bf0 --- /dev/null +++ b/tools/llvm-mc/AsmExpr.h @@ -0,0 +1,179 @@ +//===- AsmExpr.h - Assembly file expressions --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMEXPR_H +#define ASMEXPR_H + +#include "llvm/Support/Casting.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCContext; +class MCSymbol; +class MCValue; + +/// AsmExpr - Base class for the full range of assembler expressions which are +/// needed for parsing. +class AsmExpr { +public: + enum AsmExprKind { + Binary, ///< Binary expressions. + Constant, ///< Constant expressions. + SymbolRef, ///< References to labels and assigned expressions. + Unary ///< Unary expressions. + }; + +private: + AsmExprKind Kind; + +protected: + AsmExpr(AsmExprKind _Kind) : Kind(_Kind) {} + +public: + virtual ~AsmExpr(); + + AsmExprKind getKind() const { return Kind; } + + /// EvaluateAsAbsolute - Try to evaluate the expression to an absolute value. + /// + /// @param Res - The absolute value, if evaluation succeeds. + /// @result - True on success. + bool EvaluateAsAbsolute(MCContext &Ctx, int64_t &Res) const; + + /// EvaluateAsRelocatable - Try to evaluate the expression to a relocatable + /// value, i.e. an expression of the fixed form (a - b + constant). + /// + /// @param Res - The relocatable value, if evaluation succeeds. + /// @result - True on success. + bool EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const; + + static bool classof(const AsmExpr *) { return true; } +}; + +//// AsmConstantExpr - Represent a constant integer expression. +class AsmConstantExpr : public AsmExpr { + int64_t Value; + +public: + AsmConstantExpr(int64_t _Value) + : AsmExpr(AsmExpr::Constant), Value(_Value) {} + + int64_t getValue() const { return Value; } + + static bool classof(const AsmExpr *E) { + return E->getKind() == AsmExpr::Constant; + } + static bool classof(const AsmConstantExpr *) { return true; } +}; + +/// AsmSymbolRefExpr - Represent a reference to a symbol from inside an +/// expression. +/// +/// A symbol reference in an expression may be a use of a label, a use of an +/// assembler variable (defined constant), or constitute an implicit definition +/// of the symbol as external. +class AsmSymbolRefExpr : public AsmExpr { + MCSymbol *Symbol; + +public: + AsmSymbolRefExpr(MCSymbol *_Symbol) + : AsmExpr(AsmExpr::SymbolRef), Symbol(_Symbol) {} + + MCSymbol *getSymbol() const { return Symbol; } + + static bool classof(const AsmExpr *E) { + return E->getKind() == AsmExpr::SymbolRef; + } + static bool classof(const AsmSymbolRefExpr *) { return true; } +}; + +/// AsmUnaryExpr - Unary assembler expressions. +class AsmUnaryExpr : public AsmExpr { +public: + enum Opcode { + LNot, ///< Logical negation. + Minus, ///< Unary minus. + Not, ///< Bitwise negation. + Plus ///< Unary plus. + }; + +private: + Opcode Op; + AsmExpr *Expr; + +public: + AsmUnaryExpr(Opcode _Op, AsmExpr *_Expr) + : AsmExpr(AsmExpr::Unary), Op(_Op), Expr(_Expr) {} + ~AsmUnaryExpr() { + delete Expr; + } + + Opcode getOpcode() const { return Op; } + + AsmExpr *getSubExpr() const { return Expr; } + + static bool classof(const AsmExpr *E) { + return E->getKind() == AsmExpr::Unary; + } + static bool classof(const AsmUnaryExpr *) { return true; } +}; + +/// AsmBinaryExpr - Binary assembler expressions. +class AsmBinaryExpr : public AsmExpr { +public: + enum Opcode { + Add, ///< Addition. + And, ///< Bitwise and. + Div, ///< Division. + EQ, ///< Equality comparison. + GT, ///< Greater than comparison. + GTE, ///< Greater than or equal comparison. + LAnd, ///< Logical and. + LOr, ///< Logical or. + LT, ///< Less than comparison. + LTE, ///< Less than or equal comparison. + Mod, ///< Modulus. + Mul, ///< Multiplication. + NE, ///< Inequality comparison. + Or, ///< Bitwise or. + Shl, ///< Bitwise shift left. + Shr, ///< Bitwise shift right. + Sub, ///< Subtraction. + Xor ///< Bitwise exclusive or. + }; + +private: + Opcode Op; + AsmExpr *LHS, *RHS; + +public: + AsmBinaryExpr(Opcode _Op, AsmExpr *_LHS, AsmExpr *_RHS) + : AsmExpr(AsmExpr::Binary), Op(_Op), LHS(_LHS), RHS(_RHS) {} + ~AsmBinaryExpr() { + delete LHS; + delete RHS; + } + + Opcode getOpcode() const { return Op; } + + /// getLHS - Get the left-hand side expression of the binary operator. + AsmExpr *getLHS() const { return LHS; } + + /// getRHS - Get the right-hand side expression of the binary operator. + AsmExpr *getRHS() const { return RHS; } + + static bool classof(const AsmExpr *E) { + return E->getKind() == AsmExpr::Binary; + } + static bool classof(const AsmBinaryExpr *) { return true; } +}; + +} // end namespace llvm + +#endif diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp index db86825f3d28..7b744fbde65a 100644 --- a/tools/llvm-mc/AsmLexer.cpp +++ b/tools/llvm-mc/AsmLexer.cpp @@ -42,14 +42,15 @@ SMLoc AsmLexer::getLoc() const { return SMLoc::getFromPointer(TokStart); } -void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg) const { - SrcMgr.PrintMessage(Loc, Msg); +void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg, + const char *Type) const { + SrcMgr.PrintMessage(Loc, Msg, Type); } /// ReturnError - Set the error to the specified string at the specified /// location. This is defined to always return asmtok::Error. asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { - SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg); + SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error"); return asmtok::Error; } @@ -109,8 +110,11 @@ asmtok::TokKind AsmLexer::LexPercent() { /// LexSlash: Slash: / /// C-Style Comment: /* ... */ asmtok::TokKind AsmLexer::LexSlash() { - if (*CurPtr != '*') - return asmtok::Slash; + switch (*CurPtr) { + case '*': break; // C style comment. + case '/': return ++CurPtr, LexLineComment(); + default: return asmtok::Slash; + } // C Style comment. ++CurPtr; // skip the star. @@ -129,8 +133,9 @@ asmtok::TokKind AsmLexer::LexSlash() { } } -/// LexHash: Comment: #[^\n]* -asmtok::TokKind AsmLexer::LexHash() { +/// LexLineComment: Comment: #[^\n]* +/// : //[^\n]* +asmtok::TokKind AsmLexer::LexLineComment() { int CurChar = getNextChar(); while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) CurChar = getNextChar(); @@ -262,32 +267,43 @@ asmtok::TokKind AsmLexer::LexToken() { case '*': return asmtok::Star; case ',': return asmtok::Comma; case '$': return asmtok::Dollar; - case '=': return asmtok::Equal; - case '|': return asmtok::Pipe; + case '=': + if (*CurPtr == '=') + return ++CurPtr, asmtok::EqualEqual; + return asmtok::Equal; + case '|': + if (*CurPtr == '|') + return ++CurPtr, asmtok::PipePipe; + return asmtok::Pipe; case '^': return asmtok::Caret; - case '&': return asmtok::Amp; - case '!': return asmtok::Exclaim; + case '&': + if (*CurPtr == '&') + return ++CurPtr, asmtok::AmpAmp; + return asmtok::Amp; + case '!': + if (*CurPtr == '=') + return ++CurPtr, asmtok::ExclaimEqual; + return asmtok::Exclaim; case '%': return LexPercent(); case '/': return LexSlash(); - case '#': return LexHash(); + case '#': return LexLineComment(); case '"': return LexQuote(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return LexDigit(); case '<': - if (*CurPtr == '<') { - ++CurPtr; - return asmtok::LessLess; + switch (*CurPtr) { + case '<': return ++CurPtr, asmtok::LessLess; + case '=': return ++CurPtr, asmtok::LessEqual; + case '>': return ++CurPtr, asmtok::LessGreater; + default: return asmtok::Less; } - // Don't have any use for bare '<' yet. - return ReturnError(TokStart, "invalid character in input"); case '>': - if (*CurPtr == '>') { - ++CurPtr; - return asmtok::GreaterGreater; + switch (*CurPtr) { + case '>': return ++CurPtr, asmtok::GreaterGreater; + case '=': return ++CurPtr, asmtok::GreaterEqual; + default: return asmtok::Greater; } - // Don't have any use for bare '>' yet. - return ReturnError(TokStart, "invalid character in input"); // TODO: Quoted identifiers (objc methods etc) // local labels: [0-9][:] diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h index 19a1b38af433..6360b1280ce2 100644 --- a/tools/llvm-mc/AsmLexer.h +++ b/tools/llvm-mc/AsmLexer.h @@ -42,10 +42,12 @@ namespace asmtok { Plus, Minus, Tilde, Slash, // '/' LParen, RParen, - Star, Comma, Dollar, Equal, + Star, Comma, Dollar, Equal, EqualEqual, - Pipe, Caret, Amp, Exclaim, - Percent, LessLess, GreaterGreater + Pipe, PipePipe, Caret, + Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, + Less, LessEqual, LessLess, LessGreater, + Greater, GreaterEqual, GreaterGreater }; } @@ -95,7 +97,7 @@ public: SMLoc getLoc() const; - void PrintMessage(SMLoc Loc, const std::string &Msg) const; + void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; private: int getNextChar(); @@ -106,7 +108,7 @@ private: asmtok::TokKind LexIdentifier(); asmtok::TokKind LexPercent(); asmtok::TokKind LexSlash(); - asmtok::TokKind LexHash(); + asmtok::TokKind LexLineComment(); asmtok::TokKind LexDigit(); asmtok::TokKind LexQuote(); }; diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp index 2b697a66ad2a..f5bf58920120 100644 --- a/tools/llvm-mc/AsmParser.cpp +++ b/tools/llvm-mc/AsmParser.cpp @@ -12,20 +12,27 @@ //===----------------------------------------------------------------------===// #include "AsmParser.h" + +#include "AsmExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +void AsmParser::Warning(SMLoc L, const char *Msg) { + Lexer.PrintMessage(L, Msg, "warning"); +} + bool AsmParser::Error(SMLoc L, const char *Msg) { - Lexer.PrintMessage(L, Msg); + Lexer.PrintMessage(L, Msg, "error"); return true; } bool AsmParser::TokError(const char *Msg) { - Lexer.PrintMessage(Lexer.getLoc(), Msg); + Lexer.PrintMessage(Lexer.getLoc(), Msg, "error"); return true; } @@ -33,11 +40,18 @@ bool AsmParser::Run() { // Prime the lexer. Lexer.Lex(); - while (Lexer.isNot(asmtok::Eof)) - if (ParseStatement()) - return true; + bool HadError = false; - return false; + // While we have input, parse each statement. + while (Lexer.isNot(asmtok::Eof)) { + if (!ParseStatement()) continue; + + // If we had an error, remember it and recover by skipping to the next line. + HadError = true; + EatToEndOfStatement(); + } + + return HadError; } /// EatToEndOfStatement - Throw away the rest of the line for testing purposes. @@ -57,7 +71,7 @@ void AsmParser::EatToEndOfStatement() { /// /// parenexpr ::= expr) /// -bool AsmParser::ParseParenExpr(int64_t &Res) { +bool AsmParser::ParseParenExpr(AsmExpr *&Res) { if (ParseExpression(Res)) return true; if (Lexer.isNot(asmtok::RParen)) return TokError("expected ')' in parentheses expression"); @@ -70,28 +84,54 @@ bool AsmParser::ParseParenExpr(int64_t &Res) { /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= ~,+,- primaryexpr -bool AsmParser::ParsePrimaryExpr(int64_t &Res) { +bool AsmParser::ParsePrimaryExpr(AsmExpr *&Res) { switch (Lexer.getKind()) { default: return TokError("unknown token in expression"); - case asmtok::Identifier: + case asmtok::Exclaim: + Lexer.Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res)) + return true; + Res = new AsmUnaryExpr(AsmUnaryExpr::LNot, Res); + return false; + case asmtok::Identifier: { // This is a label, this should be parsed as part of an expression, to - // handle things like LFOO+4 - Res = 0; // FIXME. + // handle things like LFOO+4. + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Lexer.getCurStrVal()); + + // If this is use of an undefined symbol then mark it external. + if (!Sym->getSection() && !Ctx.GetSymbolValue(Sym)) + Sym->setExternal(true); + + Res = new AsmSymbolRefExpr(Sym); Lexer.Lex(); // Eat identifier. return false; + } case asmtok::IntVal: - Res = Lexer.getCurIntVal(); + Res = new AsmConstantExpr(Lexer.getCurIntVal()); Lexer.Lex(); // Eat identifier. return false; case asmtok::LParen: Lexer.Lex(); // Eat the '('. return ParseParenExpr(Res); - case asmtok::Tilde: - case asmtok::Plus: case asmtok::Minus: Lexer.Lex(); // Eat the operator. - return ParsePrimaryExpr(Res); + if (ParsePrimaryExpr(Res)) + return true; + Res = new AsmUnaryExpr(AsmUnaryExpr::Minus, Res); + return false; + case asmtok::Plus: + Lexer.Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res)) + return true; + Res = new AsmUnaryExpr(AsmUnaryExpr::Plus, Res); + return false; + case asmtok::Tilde: + Lexer.Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res)) + return true; + Res = new AsmUnaryExpr(AsmUnaryExpr::Not, Res); + return false; } } @@ -102,59 +142,152 @@ bool AsmParser::ParsePrimaryExpr(int64_t &Res) { /// expr ::= expr *,/,%,<<,>> expr -> highest. /// expr ::= primaryexpr /// -bool AsmParser::ParseExpression(int64_t &Res) { +bool AsmParser::ParseExpression(AsmExpr *&Res) { + Res = 0; return ParsePrimaryExpr(Res) || ParseBinOpRHS(1, Res); } -static unsigned getBinOpPrecedence(asmtok::TokKind K) { +bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { + AsmExpr *Expr; + + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Expr)) + return true; + + if (!Expr->EvaluateAsAbsolute(Ctx, Res)) + return Error(StartLoc, "expected absolute expression"); + + return false; +} + +bool AsmParser::ParseRelocatableExpression(MCValue &Res) { + AsmExpr *Expr; + + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Expr)) + return true; + + if (!Expr->EvaluateAsRelocatable(Ctx, Res)) + return Error(StartLoc, "expected relocatable expression"); + + return false; +} + +bool AsmParser::ParseParenRelocatableExpression(MCValue &Res) { + AsmExpr *Expr; + + SMLoc StartLoc = Lexer.getLoc(); + if (ParseParenExpr(Expr)) + return true; + + if (!Expr->EvaluateAsRelocatable(Ctx, Res)) + return Error(StartLoc, "expected relocatable expression"); + + return false; +} + +static unsigned getBinOpPrecedence(asmtok::TokKind K, + AsmBinaryExpr::Opcode &Kind) { switch (K) { default: return 0; // not a binop. + + // Lowest Precedence: &&, || + case asmtok::AmpAmp: + Kind = AsmBinaryExpr::LAnd; + return 1; + case asmtok::PipePipe: + Kind = AsmBinaryExpr::LOr; + return 1; + + // Low Precedence: +, -, ==, !=, <>, <, <=, >, >= case asmtok::Plus: + Kind = AsmBinaryExpr::Add; + return 2; case asmtok::Minus: - return 1; + Kind = AsmBinaryExpr::Sub; + return 2; + case asmtok::EqualEqual: + Kind = AsmBinaryExpr::EQ; + return 2; + case asmtok::ExclaimEqual: + case asmtok::LessGreater: + Kind = AsmBinaryExpr::NE; + return 2; + case asmtok::Less: + Kind = AsmBinaryExpr::LT; + return 2; + case asmtok::LessEqual: + Kind = AsmBinaryExpr::LTE; + return 2; + case asmtok::Greater: + Kind = AsmBinaryExpr::GT; + return 2; + case asmtok::GreaterEqual: + Kind = AsmBinaryExpr::GTE; + return 2; + + // Intermediate Precedence: |, &, ^ + // + // FIXME: gas seems to support '!' as an infix operator? case asmtok::Pipe: + Kind = AsmBinaryExpr::Or; + return 3; case asmtok::Caret: + Kind = AsmBinaryExpr::Xor; + return 3; case asmtok::Amp: - case asmtok::Exclaim: - return 2; + Kind = AsmBinaryExpr::And; + return 3; + + // Highest Precedence: *, /, %, <<, >> case asmtok::Star: + Kind = AsmBinaryExpr::Mul; + return 4; case asmtok::Slash: + Kind = AsmBinaryExpr::Div; + return 4; case asmtok::Percent: + Kind = AsmBinaryExpr::Mod; + return 4; case asmtok::LessLess: + Kind = AsmBinaryExpr::Shl; + return 4; case asmtok::GreaterGreater: - return 3; + Kind = AsmBinaryExpr::Shr; + return 4; } } /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'. /// Res contains the LHS of the expression on input. -bool AsmParser::ParseBinOpRHS(unsigned Precedence, int64_t &Res) { +bool AsmParser::ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res) { while (1) { - unsigned TokPrec = getBinOpPrecedence(Lexer.getKind()); + AsmBinaryExpr::Opcode Kind = AsmBinaryExpr::Add; + unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); // If the next token is lower precedence than we are allowed to eat, return // successfully with what we ate already. if (TokPrec < Precedence) return false; - //asmtok::TokKind BinOp = Lexer.getKind(); Lexer.Lex(); // Eat the next primary expression. - int64_t RHS; + AsmExpr *RHS; if (ParsePrimaryExpr(RHS)) return true; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. - unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind()); + AsmBinaryExpr::Opcode Dummy; + unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); if (TokPrec < NextTokPrec) { if (ParseBinOpRHS(Precedence+1, RHS)) return true; } - // Merge LHS/RHS: fixme use the right operator etc. - Res += RHS; + // Merge LHS and RHS according to operator. + Res = new AsmBinaryExpr(Kind, Res, RHS); } } @@ -183,16 +316,28 @@ bool AsmParser::ParseStatement() { // Consume the identifier, see what is after it. switch (Lexer.Lex()) { - case asmtok::Colon: + case asmtok::Colon: { // identifier ':' -> Label. Lexer.Lex(); + + // Diagnose attempt to use a variable as a label. + // + // FIXME: Diagnostics. Note the location of the definition as a label. + // FIXME: This doesn't diagnose assignment to a symbol which has been + // implicitly marked as external. + MCSymbol *Sym = Ctx.GetOrCreateSymbol(IDVal); + if (Sym->getSection()) + return Error(IDLoc, "invalid symbol redefinition"); + if (Ctx.GetSymbolValue(Sym)) + return Error(IDLoc, "symbol already used as assembler variable"); // Since we saw a label, create a symbol and emit it. // FIXME: If the label starts with L it is an assembler temporary label. // Why does the client of this api need to know this? - Out.EmitLabel(Ctx.GetOrCreateSymbol(IDVal)); - + Out.EmitLabel(Sym); + return ParseStatement(); + } case asmtok::Equal: // identifier '=' ... -> assignment statement @@ -322,20 +467,66 @@ bool AsmParser::ParseStatement() { return ParseDirectiveValue(4); if (!strcmp(IDVal, ".quad")) return ParseDirectiveValue(8); - if (!strcmp(IDVal, ".fill")) - return ParseDirectiveFill(); + + // FIXME: Target hooks for IsPow2. + if (!strcmp(IDVal, ".align")) + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + if (!strcmp(IDVal, ".align32")) + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + if (!strcmp(IDVal, ".balign")) + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); + if (!strcmp(IDVal, ".balignw")) + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); + if (!strcmp(IDVal, ".balignl")) + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); + if (!strcmp(IDVal, ".p2align")) + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + if (!strcmp(IDVal, ".p2alignw")) + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); + if (!strcmp(IDVal, ".p2alignl")) + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + if (!strcmp(IDVal, ".org")) return ParseDirectiveOrg(); + + if (!strcmp(IDVal, ".fill")) + return ParseDirectiveFill(); if (!strcmp(IDVal, ".space")) return ParseDirectiveSpace(); - Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now"); + // Symbol attribute directives + if (!strcmp(IDVal, ".globl") || !strcmp(IDVal, ".global")) + return ParseDirectiveSymbolAttribute(MCStreamer::Global); + if (!strcmp(IDVal, ".hidden")) + return ParseDirectiveSymbolAttribute(MCStreamer::Hidden); + if (!strcmp(IDVal, ".indirect_symbol")) + return ParseDirectiveSymbolAttribute(MCStreamer::IndirectSymbol); + if (!strcmp(IDVal, ".internal")) + return ParseDirectiveSymbolAttribute(MCStreamer::Internal); + if (!strcmp(IDVal, ".lazy_reference")) + return ParseDirectiveSymbolAttribute(MCStreamer::LazyReference); + if (!strcmp(IDVal, ".no_dead_strip")) + return ParseDirectiveSymbolAttribute(MCStreamer::NoDeadStrip); + if (!strcmp(IDVal, ".private_extern")) + return ParseDirectiveSymbolAttribute(MCStreamer::PrivateExtern); + if (!strcmp(IDVal, ".protected")) + return ParseDirectiveSymbolAttribute(MCStreamer::Protected); + if (!strcmp(IDVal, ".reference")) + return ParseDirectiveSymbolAttribute(MCStreamer::Reference); + if (!strcmp(IDVal, ".weak")) + return ParseDirectiveSymbolAttribute(MCStreamer::Weak); + if (!strcmp(IDVal, ".weak_definition")) + return ParseDirectiveSymbolAttribute(MCStreamer::WeakDefinition); + if (!strcmp(IDVal, ".weak_reference")) + return ParseDirectiveSymbolAttribute(MCStreamer::WeakReference); + + Warning(IDLoc, "ignoring directive for now"); EatToEndOfStatement(); return false; } MCInst Inst; - if (ParseX86InstOperands(Inst)) + if (ParseX86InstOperands(IDVal, Inst)) return true; if (Lexer.isNot(asmtok::EndOfStatement)) @@ -345,16 +536,18 @@ bool AsmParser::ParseStatement() { Lexer.Lex(); // Instruction is good, process it. - outs() << "Found instruction: " << IDVal << " with " << Inst.getNumOperands() - << " operands.\n"; + Out.EmitInstruction(Inst); // Skip to end of line for now. return false; } bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) { - int64_t Value; - if (ParseExpression(Value)) + // FIXME: Use better location, we should use proper tokens. + SMLoc EqualLoc = Lexer.getLoc(); + + MCValue Value; + if (ParseRelocatableExpression(Value)) return true; if (Lexer.isNot(asmtok::EndOfStatement)) @@ -363,10 +556,21 @@ bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) { // Eat the end of statement marker. Lexer.Lex(); - // Get the symbol for this name. + // Diagnose assignment to a label. + // + // FIXME: Diagnostics. Note the location of the definition as a label. + // FIXME: This doesn't diagnose assignment to a symbol which has been + // implicitly marked as external. // FIXME: Handle '.'. + // FIXME: Diagnose assignment to protected identifier (e.g., register name). MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); - Out.EmitAssignment(Sym, MCValue::get(Value), IsDotSet); + if (Sym->getSection()) + return Error(EqualLoc, "invalid assignment to symbol emitted as a label"); + if (Sym->isExternal()) + return Error(EqualLoc, "invalid assignment to external symbol"); + + // Do the assignment. + Out.EmitAssignment(Sym, Value, IsDotSet); return false; } @@ -433,7 +637,7 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Section, } /// ParseDirectiveAscii: -/// ::= ( .ascii | .asciiz ) [ "string" ( , "string" )* ] +/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { if (Lexer.isNot(asmtok::EndOfStatement)) { for (;;) { @@ -468,11 +672,11 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { bool AsmParser::ParseDirectiveValue(unsigned Size) { if (Lexer.isNot(asmtok::EndOfStatement)) { for (;;) { - int64_t Expr; - if (ParseExpression(Expr)) + MCValue Expr; + if (ParseRelocatableExpression(Expr)) return true; - Out.EmitValue(MCValue::get(Expr), Size); + Out.EmitValue(Expr, Size); if (Lexer.is(asmtok::EndOfStatement)) break; @@ -492,7 +696,7 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { /// ::= .space expression [ , expression ] bool AsmParser::ParseDirectiveSpace() { int64_t NumBytes; - if (ParseExpression(NumBytes)) + if (ParseAbsoluteExpression(NumBytes)) return true; int64_t FillExpr = 0; @@ -502,7 +706,7 @@ bool AsmParser::ParseDirectiveSpace() { return TokError("unexpected token in '.space' directive"); Lexer.Lex(); - if (ParseExpression(FillExpr)) + if (ParseAbsoluteExpression(FillExpr)) return true; HasFillExpr = true; @@ -527,7 +731,7 @@ bool AsmParser::ParseDirectiveSpace() { /// ::= .fill expression , expression , expression bool AsmParser::ParseDirectiveFill() { int64_t NumValues; - if (ParseExpression(NumValues)) + if (ParseAbsoluteExpression(NumValues)) return true; if (Lexer.isNot(asmtok::Comma)) @@ -535,7 +739,7 @@ bool AsmParser::ParseDirectiveFill() { Lexer.Lex(); int64_t FillSize; - if (ParseExpression(FillSize)) + if (ParseAbsoluteExpression(FillSize)) return true; if (Lexer.isNot(asmtok::Comma)) @@ -543,7 +747,7 @@ bool AsmParser::ParseDirectiveFill() { Lexer.Lex(); int64_t FillExpr; - if (ParseExpression(FillExpr)) + if (ParseAbsoluteExpression(FillExpr)) return true; if (Lexer.isNot(asmtok::EndOfStatement)) @@ -563,8 +767,8 @@ bool AsmParser::ParseDirectiveFill() { /// ParseDirectiveOrg /// ::= .org expression [ , expression ] bool AsmParser::ParseDirectiveOrg() { - int64_t Offset; - if (ParseExpression(Offset)) + MCValue Offset; + if (ParseRelocatableExpression(Offset)) return true; // Parse optional fill expression. @@ -574,7 +778,7 @@ bool AsmParser::ParseDirectiveOrg() { return TokError("unexpected token in '.org' directive"); Lexer.Lex(); - if (ParseExpression(FillExpr)) + if (ParseAbsoluteExpression(FillExpr)) return true; if (Lexer.isNot(asmtok::EndOfStatement)) @@ -582,8 +786,113 @@ bool AsmParser::ParseDirectiveOrg() { } Lexer.Lex(); - - Out.EmitValueToOffset(MCValue::get(Offset), FillExpr); + + // FIXME: Only limited forms of relocatable expressions are accepted here, it + // has to be relative to the current section. + Out.EmitValueToOffset(Offset, FillExpr); + + return false; +} + +/// ParseDirectiveAlign +/// ::= {.align, ...} expression [ , expression [ , expression ]] +bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { + int64_t Alignment; + if (ParseAbsoluteExpression(Alignment)) + return true; + + SMLoc MaxBytesLoc; + bool HasFillExpr = false; + int64_t FillExpr = 0; + int64_t MaxBytesToFill = 0; + if (Lexer.isNot(asmtok::EndOfStatement)) { + if (Lexer.isNot(asmtok::Comma)) + return TokError("unexpected token in directive"); + Lexer.Lex(); + + // The fill expression can be omitted while specifying a maximum number of + // alignment bytes, e.g: + // .align 3,,4 + if (Lexer.isNot(asmtok::Comma)) { + HasFillExpr = true; + if (ParseAbsoluteExpression(FillExpr)) + return true; + } + + if (Lexer.isNot(asmtok::EndOfStatement)) { + if (Lexer.isNot(asmtok::Comma)) + return TokError("unexpected token in directive"); + Lexer.Lex(); + + MaxBytesLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(MaxBytesToFill)) + return true; + + if (Lexer.isNot(asmtok::EndOfStatement)) + return TokError("unexpected token in directive"); + } + } + + Lexer.Lex(); + + if (!HasFillExpr) { + // FIXME: Sometimes fill with nop. + FillExpr = 0; + } + + // Compute alignment in bytes. + if (IsPow2) { + // FIXME: Diagnose overflow. + Alignment = 1 << Alignment; + } + + // Diagnose non-sensical max bytes to fill. + if (MaxBytesLoc.isValid()) { + if (MaxBytesToFill < 1) { + Warning(MaxBytesLoc, "alignment directive can never be satisfied in this " + "many bytes, ignoring"); + return false; + } + + if (MaxBytesToFill >= Alignment) { + Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and " + "has no effect"); + MaxBytesToFill = 0; + } + } + + // FIXME: Target specific behavior about how the "extra" bytes are filled. + Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); return false; } + +/// ParseDirectiveSymbolAttribute +/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] +bool AsmParser::ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr) { + if (Lexer.isNot(asmtok::EndOfStatement)) { + for (;;) { + if (Lexer.isNot(asmtok::Identifier)) + return TokError("expected identifier in directive"); + + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Lexer.getCurStrVal()); + Lexer.Lex(); + + // If this is use of an undefined symbol then mark it external. + if (!Sym->getSection() && !Ctx.GetSymbolValue(Sym)) + Sym->setExternal(true); + + Out.EmitSymbolAttribute(Sym, Attr); + + if (Lexer.is(asmtok::EndOfStatement)) + break; + + if (Lexer.isNot(asmtok::Comma)) + return TokError("unexpected token in directive"); + Lexer.Lex(); + } + } + + Lexer.Lex(); + return false; +} diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h index da256c275ca3..333b284eac91 100644 --- a/tools/llvm-mc/AsmParser.h +++ b/tools/llvm-mc/AsmParser.h @@ -15,19 +15,24 @@ #define ASMPARSER_H #include "AsmLexer.h" +#include "llvm/MC/MCStreamer.h" namespace llvm { +class AsmExpr; class MCContext; class MCInst; class MCStreamer; - +class MCValue; + class AsmParser { +public: + struct X86Operand; + +private: AsmLexer Lexer; MCContext &Ctx; MCStreamer &Out; - struct X86Operand; - public: AsmParser(SourceMgr &SM, MCContext &ctx, MCStreamer &OutStr) : Lexer(SM), Ctx(ctx), Out(OutStr) {} @@ -37,22 +42,56 @@ public: private: bool ParseStatement(); - + + void Warning(SMLoc L, const char *Msg); bool Error(SMLoc L, const char *Msg); bool TokError(const char *Msg); void EatToEndOfStatement(); bool ParseAssignment(const char *Name, bool IsDotSet); - bool ParseExpression(int64_t &Res); - bool ParsePrimaryExpr(int64_t &Res); - bool ParseBinOpRHS(unsigned Precedence, int64_t &Res); - bool ParseParenExpr(int64_t &Res); + + /// ParseExpression - Parse a general assembly expression. + /// + /// @param Res - The resulting expression. The pointer value is null on error. + /// @result - False on success. + bool ParseExpression(AsmExpr *&Res); + + /// ParseAbsoluteExpression - Parse an expression which must evaluate to an + /// absolute value. + /// + /// @param Res - The value of the absolute expression. The result is undefined + /// on error. + /// @result - False on success. + bool ParseAbsoluteExpression(int64_t &Res); + + /// ParseRelocatableExpression - Parse an expression which must be + /// relocatable. + /// + /// @param Res - The relocatable expression value. The result is undefined on + /// error. + /// @result - False on success. + bool ParseRelocatableExpression(MCValue &Res); + + /// ParseParenRelocatableExpression - Parse an expression which must be + /// relocatable, assuming that an initial '(' has already been consumed. + /// + /// @param Res - The relocatable expression value. The result is undefined on + /// error. + /// @result - False on success. + /// + /// @see ParseRelocatableExpression, ParseParenExpr. + bool ParseParenRelocatableExpression(MCValue &Res); + + bool ParsePrimaryExpr(AsmExpr *&Res); + bool ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res); + bool ParseParenExpr(AsmExpr *&Res); // X86 specific. - bool ParseX86InstOperands(MCInst &Inst); + bool ParseX86InstOperands(const char *InstName, MCInst &Inst); bool ParseX86Operand(X86Operand &Op); bool ParseX86MemOperand(X86Operand &Op); + bool ParseX86Register(X86Operand &Op); // Directive Parsing. bool ParseDirectiveDarwinSection(); // Darwin specific ".section". @@ -64,6 +103,12 @@ private: bool ParseDirectiveSpace(); // ".space" bool ParseDirectiveSet(); // ".set" bool ParseDirectiveOrg(); // ".org" + // ".align{,32}", ".p2align{,w,l}" + bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize); + + /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which + /// accepts a single symbol (which should be a label or an external). + bool ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr); }; diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt index 2dd878d6e467..b21a4b1b9189 100644 --- a/tools/llvm-mc/CMakeLists.txt +++ b/tools/llvm-mc/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS support MC) add_llvm_tool(llvm-mc llvm-mc.cpp + AsmExpr.cpp AsmLexer.cpp AsmParser.cpp MC-X86Specific.cpp diff --git a/tools/llvm-mc/MC-X86Specific.cpp b/tools/llvm-mc/MC-X86Specific.cpp index 45774cf48c8b..fec13ce5ab59 100644 --- a/tools/llvm-mc/MC-X86Specific.cpp +++ b/tools/llvm-mc/MC-X86Specific.cpp @@ -14,6 +14,7 @@ #include "AsmParser.h" #include "llvm/MC/MCInst.h" +#include "llvm/Support/SourceMgr.h" using namespace llvm; /// X86Operand - Instances of this class represent one X86 machine instruction. @@ -30,78 +31,91 @@ struct AsmParser::X86Operand { } Reg; struct { - // FIXME: Should be a general expression. - int64_t Val; + MCValue Val; } Imm; struct { unsigned SegReg; - int64_t Disp; // FIXME: Should be a general expression. + MCValue Disp; unsigned BaseReg; + unsigned IndexReg; unsigned Scale; - unsigned ScaleReg; } Mem; }; + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNo; + } + static X86Operand CreateReg(unsigned RegNo) { X86Operand Res; Res.Kind = Register; Res.Reg.RegNo = RegNo; return Res; } - static X86Operand CreateImm(int64_t Val) { + static X86Operand CreateImm(MCValue Val) { X86Operand Res; Res.Kind = Immediate; Res.Imm.Val = Val; return Res; } - static X86Operand CreateMem(unsigned SegReg, int64_t Disp, unsigned BaseReg, - unsigned Scale, unsigned ScaleReg) { + static X86Operand CreateMem(unsigned SegReg, MCValue Disp, unsigned BaseReg, + unsigned IndexReg, unsigned Scale) { + // If there is no index register, we should never have a scale, and we + // should always have a scale (in {1,2,4,8}) if we do. + assert(((Scale == 0 && !IndexReg) || + (IndexReg && (Scale == 1 || Scale == 2 || + Scale == 4 || Scale == 8))) && + "Invalid scale!"); X86Operand Res; Res.Kind = Memory; Res.Mem.SegReg = SegReg; Res.Mem.Disp = Disp; Res.Mem.BaseReg = BaseReg; + Res.Mem.IndexReg = IndexReg; Res.Mem.Scale = Scale; - Res.Mem.ScaleReg = ScaleReg; return Res; } - - void AddToMCInst(MCInst &I) { - // FIXME: Add in x86 order here. - } }; +bool AsmParser::ParseX86Register(X86Operand &Op) { + assert(Lexer.getKind() == asmtok::Register && "Invalid token kind!"); + + // FIXME: Decode register number. + Op = X86Operand::CreateReg(123); + Lexer.Lex(); // Eat register token. + + return false; +} + bool AsmParser::ParseX86Operand(X86Operand &Op) { switch (Lexer.getKind()) { default: return ParseX86MemOperand(Op); case asmtok::Register: - // FIXME: Decode reg #. // FIXME: if a segment register, this could either be just the seg reg, or // the start of a memory operand. - Op = X86Operand::CreateReg(123); - Lexer.Lex(); // Eat register. - return false; + return ParseX86Register(Op); case asmtok::Dollar: { // $42 -> immediate. Lexer.Lex(); - int64_t Val; - if (ParseExpression(Val)) - return TokError("expected integer constant"); - Op = X86Operand::CreateReg(Val); + MCValue Val; + if (ParseRelocatableExpression(Val)) + return true; + Op = X86Operand::CreateImm(Val); return false; - case asmtok::Star: + } + case asmtok::Star: { Lexer.Lex(); // Eat the star. if (Lexer.is(asmtok::Register)) { - Op = X86Operand::CreateReg(123); - Lexer.Lex(); // Eat register. + if (ParseX86Register(Op)) + return true; } else if (ParseX86MemOperand(Op)) return true; - // FIXME: Note that these are 'dereferenced' so that clients know the '*' is - // there. + // FIXME: Note the '*' in the operand for use by the matcher. return false; } } @@ -116,9 +130,9 @@ bool AsmParser::ParseX86MemOperand(X86Operand &Op) { // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the ( and see what is after // it. - int64_t Disp = 0; + MCValue Disp = MCValue::get(0, 0, 0); if (Lexer.isNot(asmtok::LParen)) { - if (ParseExpression(Disp)) return true; + if (ParseRelocatableExpression(Disp)) return true; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. @@ -139,8 +153,7 @@ bool AsmParser::ParseX86MemOperand(X86Operand &Op) { // memory operand consumed. } else { // It must be an parenthesized expression, parse it now. - if (ParseParenExpr(Disp) || - ParseBinOpRHS(1, Disp)) + if (ParseParenRelocatableExpression(Disp)) return true; // After parsing the base expression we could either have a parenthesized @@ -157,33 +170,57 @@ bool AsmParser::ParseX86MemOperand(X86Operand &Op) { // If we reached here, then we just ate the ( of the memory operand. Process // the rest of the memory operand. - unsigned BaseReg = 0, ScaleReg = 0, Scale = 0; + unsigned BaseReg = 0, IndexReg = 0, Scale = 0; if (Lexer.is(asmtok::Register)) { - BaseReg = 123; // FIXME: decode reg # - Lexer.Lex(); // eat the register. + if (ParseX86Register(Op)) + return true; + BaseReg = Op.getReg(); } if (Lexer.is(asmtok::Comma)) { - Lexer.Lex(); // eat the comma. - + Lexer.Lex(); // Eat the comma. + + // Following the comma we should have either an index register, or a scale + // value. We don't support the later form, but we want to parse it + // correctly. + // + // Not that even though it would be completely consistent to support syntax + // like "1(%eax,,1)", the assembler doesn't. if (Lexer.is(asmtok::Register)) { - ScaleReg = 123; // FIXME: decode reg # - Lexer.Lex(); // eat the register. + if (ParseX86Register(Op)) + return true; + IndexReg = Op.getReg(); Scale = 1; // If not specified, the scale defaults to 1. - } - if (Lexer.is(asmtok::Comma)) { - Lexer.Lex(); // eat the comma. - - // If present, get and validate scale amount. - if (Lexer.is(asmtok::IntVal)) { - int64_t ScaleVal = Lexer.getCurIntVal(); - if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8) - return TokError("scale factor in address must be 1, 2, 4 or 8"); - Lexer.Lex(); // eat the scale. - Scale = (unsigned)ScaleVal; + if (Lexer.isNot(asmtok::RParen)) { + // Parse the scale amount: + // ::= ',' [scale-expression] + if (Lexer.isNot(asmtok::Comma)) + return true; + Lexer.Lex(); // Eat the comma. + + if (Lexer.isNot(asmtok::RParen)) { + int64_t ScaleVal; + if (ParseAbsoluteExpression(ScaleVal)) + return true; + + // Validate the scale amount. + if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8) + return TokError("scale factor in address must be 1, 2, 4 or 8"); + Scale = (unsigned)ScaleVal; + } } + } else if (Lexer.isNot(asmtok::RParen)) { + // Otherwise we have the unsupported form of a scale amount without an + // index. + SMLoc Loc = Lexer.getLoc(); + + int64_t Value; + if (ParseAbsoluteExpression(Value)) + return true; + + return Error(Loc, "cannot have scale factor without index register"); } } @@ -192,31 +229,38 @@ bool AsmParser::ParseX86MemOperand(X86Operand &Op) { return TokError("unexpected token in memory operand"); Lexer.Lex(); // Eat the ')'. - Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, Scale, ScaleReg); + Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale); + return false; +} + +/// MatchX86Inst - Convert a parsed instruction name and operand list into a +/// concrete instruction. +static bool MatchX86Inst(const char *Name, + llvm::SmallVector<AsmParser::X86Operand, 3> &Operands, + MCInst &Inst) { return false; } /// ParseX86InstOperands - Parse the operands of an X86 instruction and return /// them as the operands of an MCInst. -bool AsmParser::ParseX86InstOperands(MCInst &Inst) { - // If no operands are present, just return. - if (Lexer.is(asmtok::EndOfStatement)) - return false; +bool AsmParser::ParseX86InstOperands(const char *InstName, MCInst &Inst) { + llvm::SmallVector<X86Operand, 3> Operands; - // Read the first operand. - X86Operand Op; - if (ParseX86Operand(Op)) - return true; - Op.AddToMCInst(Inst); - - while (Lexer.is(asmtok::Comma)) { - Lexer.Lex(); // Eat the comma. - - // Parse and remember the operand. - Op = X86Operand(); - if (ParseX86Operand(Op)) + if (Lexer.isNot(asmtok::EndOfStatement)) { + // Read the first operand. + Operands.push_back(X86Operand()); + if (ParseX86Operand(Operands.back())) return true; - Op.AddToMCInst(Inst); + + while (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // Eat the comma. + + // Parse and remember the operand. + Operands.push_back(X86Operand()); + if (ParseX86Operand(Operands.back())) + return true; + } } - return false; + + return MatchX86Inst(InstName, Operands, Inst); } diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index 4100cb14de10..b52edd1ed434 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -80,7 +80,7 @@ static int AsLexInput(const char *ProgName) { while (Tok != asmtok::Eof) { switch (Tok) { default: - Lexer.PrintMessage(Lexer.getLoc(), "driver: unknown token"); + Lexer.PrintMessage(Lexer.getLoc(), "unknown token", "warning"); Error = true; break; case asmtok::Error: @@ -98,17 +98,36 @@ static int AsLexInput(const char *ProgName) { case asmtok::IntVal: outs() << "int: " << Lexer.getCurIntVal() << '\n'; break; + + case asmtok::Amp: outs() << "Amp\n"; break; + case asmtok::AmpAmp: outs() << "AmpAmp\n"; break; + case asmtok::Caret: outs() << "Caret\n"; break; + case asmtok::Colon: outs() << "Colon\n"; break; + case asmtok::Comma: outs() << "Comma\n"; break; + case asmtok::Dollar: outs() << "Dollar\n"; break; case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break; - case asmtok::Colon: outs() << "Colon\n"; break; - case asmtok::Plus: outs() << "Plus\n"; break; - case asmtok::Minus: outs() << "Minus\n"; break; - case asmtok::Tilde: outs() << "Tilde\n"; break; - case asmtok::Slash: outs() << "Slash\n"; break; - case asmtok::LParen: outs() << "LParen\n"; break; - case asmtok::RParen: outs() << "RParen\n"; break; - case asmtok::Star: outs() << "Star\n"; break; - case asmtok::Comma: outs() << "Comma\n"; break; - case asmtok::Dollar: outs() << "Dollar\n"; break; + case asmtok::Eof: outs() << "Eof\n"; break; + case asmtok::Equal: outs() << "Equal\n"; break; + case asmtok::EqualEqual: outs() << "EqualEqual\n"; break; + case asmtok::Exclaim: outs() << "Exclaim\n"; break; + case asmtok::ExclaimEqual: outs() << "ExclaimEqual\n"; break; + case asmtok::Greater: outs() << "Greater\n"; break; + case asmtok::GreaterEqual: outs() << "GreaterEqual\n"; break; + case asmtok::GreaterGreater: outs() << "GreaterGreater\n"; break; + case asmtok::LParen: outs() << "LParen\n"; break; + case asmtok::Less: outs() << "Less\n"; break; + case asmtok::LessEqual: outs() << "LessEqual\n"; break; + case asmtok::LessGreater: outs() << "LessGreater\n"; break; + case asmtok::LessLess: outs() << "LessLess\n"; break; + case asmtok::Minus: outs() << "Minus\n"; break; + case asmtok::Percent: outs() << "Percent\n"; break; + case asmtok::Pipe: outs() << "Pipe\n"; break; + case asmtok::PipePipe: outs() << "PipePipe\n"; break; + case asmtok::Plus: outs() << "Plus\n"; break; + case asmtok::RParen: outs() << "RParen\n"; break; + case asmtok::Slash: outs() << "Slash\n"; break; + case asmtok::Star: outs() << "Star\n"; break; + case asmtok::Tilde: outs() << "Tilde\n"; break; } Tok = Lexer.Lex(); |