summaryrefslogtreecommitdiff
path: root/tools/llvm-mc
diff options
context:
space:
mode:
Diffstat (limited to 'tools/llvm-mc')
-rw-r--r--tools/llvm-mc/AsmLexer.cpp258
-rw-r--r--tools/llvm-mc/AsmLexer.h109
-rw-r--r--tools/llvm-mc/AsmParser.cpp351
-rw-r--r--tools/llvm-mc/AsmParser.h48
-rw-r--r--tools/llvm-mc/CMakeLists.txt7
-rw-r--r--tools/llvm-mc/Makefile17
-rw-r--r--tools/llvm-mc/llvm-mc.cpp161
7 files changed, 951 insertions, 0 deletions
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp
new file mode 100644
index 000000000000..0828594a35b5
--- /dev/null
+++ b/tools/llvm-mc/AsmLexer.cpp
@@ -0,0 +1,258 @@
+//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmLexer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
+ CurBuffer = 0;
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = CurBuf->getBufferStart();
+ TokStart = 0;
+}
+
+SMLoc AsmLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
+void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg) const {
+ SrcMgr.PrintMessage(Loc, Msg);
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return asmtok::Error.
+asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+ SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg);
+ return asmtok::Error;
+}
+
+int AsmLexer::getNextChar() {
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return (unsigned char)CurChar;
+ case 0: {
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr-1 != CurBuf->getBufferEnd())
+ return 0; // Just whitespace.
+
+ // If this is the end of an included file, pop the parent file off the
+ // include stack.
+ SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+ if (ParentIncludeLoc != SMLoc()) {
+ CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = ParentIncludeLoc.getPointer();
+ return getNextChar();
+ }
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
+ return EOF;
+ }
+ }
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+asmtok::TokKind AsmLexer::LexIdentifier() {
+ while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
+ *CurPtr == '.' || *CurPtr == '@')
+ ++CurPtr;
+ CurStrVal.assign(TokStart, CurPtr); // Include %
+ return asmtok::Identifier;
+}
+
+/// LexPercent: Register: %[a-zA-Z0-9]+
+asmtok::TokKind AsmLexer::LexPercent() {
+ if (!isalnum(*CurPtr))
+ return ReturnError(TokStart, "invalid register name");
+ while (isalnum(*CurPtr))
+ ++CurPtr;
+ CurStrVal.assign(TokStart, CurPtr); // Skip %
+ return asmtok::Register;
+}
+
+/// LexSlash: Slash: /
+/// C-Style Comment: /* ... */
+asmtok::TokKind AsmLexer::LexSlash() {
+ if (*CurPtr != '*')
+ return asmtok::Slash;
+
+ // C Style comment.
+ ++CurPtr; // skip the star.
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ return ReturnError(TokStart, "unterminated comment");
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ return LexToken();
+ }
+ }
+}
+
+/// LexHash: Comment: #[^\n]*
+asmtok::TokKind AsmLexer::LexHash() {
+ int CurChar = getNextChar();
+ while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return asmtok::Eof;
+ return asmtok::EndOfStatement;
+}
+
+
+/// LexDigit: First character is [0-9].
+/// Local Label: [0-9][:]
+/// Forward/Backward Label: [0-9][fb]
+/// Binary integer: 0b[01]+
+/// Octal integer: 0[0-7]+
+/// Hex integer: 0x[0-9a-fA-F]+
+/// Decimal integer: [1-9][0-9]*
+/// TODO: FP literal.
+asmtok::TokKind AsmLexer::LexDigit() {
+ if (*CurPtr == ':')
+ return ReturnError(TokStart, "FIXME: local label not implemented");
+ if (*CurPtr == 'f' || *CurPtr == 'b')
+ return ReturnError(TokStart, "FIXME: directional label not implemented");
+
+ // Decimal integer: [1-9][0-9]*
+ if (CurPtr[-1] != '0') {
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 10);
+ return asmtok::IntVal;
+ }
+
+ if (*CurPtr == 'b') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid binary number");
+ CurIntVal = strtoll(NumStart, 0, 2);
+ return asmtok::IntVal;
+ }
+
+ if (*CurPtr == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+
+ errno = 0;
+ CurIntVal = strtoll(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE) {
+ errno = 0;
+ CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE)
+ return ReturnError(CurPtr-2, "Hexadecimal number out of range");
+ }
+ return asmtok::IntVal;
+ }
+
+ // Must be an octal number, it starts with 0.
+ while (*CurPtr >= '0' && *CurPtr <= '7')
+ ++CurPtr;
+ CurIntVal = strtoll(TokStart, 0, 8);
+ return asmtok::IntVal;
+}
+
+/// LexQuote: String: "..."
+asmtok::TokKind AsmLexer::LexQuote() {
+ int CurChar = getNextChar();
+ // TODO: does gas allow multiline string constants?
+ while (CurChar != '"') {
+ if (CurChar == '\\') {
+ // Allow \", etc.
+ CurChar = getNextChar();
+ }
+
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated string constant");
+
+ CurChar = getNextChar();
+ }
+
+ CurStrVal.assign(TokStart, CurPtr); // include quotes.
+ return asmtok::String;
+}
+
+
+asmtok::TokKind AsmLexer::LexToken() {
+ TokStart = CurPtr;
+ // This always consumes at least one character.
+ int CurChar = getNextChar();
+
+ switch (CurChar) {
+ default:
+ // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+ return LexIdentifier();
+
+ // Unknown character, emit an error.
+ return ReturnError(TokStart, "invalid character in input");
+ case EOF: return asmtok::Eof;
+ case 0:
+ case ' ':
+ case '\t':
+ // Ignore whitespace.
+ return LexToken();
+ case '\n': // FALL THROUGH.
+ case '\r': // FALL THROUGH.
+ case ';': return asmtok::EndOfStatement;
+ case ':': return asmtok::Colon;
+ case '+': return asmtok::Plus;
+ case '-': return asmtok::Minus;
+ case '~': return asmtok::Tilde;
+ case '(': return asmtok::LParen;
+ case ')': return asmtok::RParen;
+ case '*': return asmtok::Star;
+ case ',': return asmtok::Comma;
+ case '$': return asmtok::Dollar;
+ case '%': return LexPercent();
+ case '/': return LexSlash();
+ case '#': return LexHash();
+ case '"': return LexQuote();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return LexDigit();
+
+ // TODO: Quoted identifiers (objc methods etc)
+ // local labels: [0-9][:]
+ // Forward/backward labels: [0-9][fb]
+ // Integers, fp constants, character constants.
+ }
+}
diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h
new file mode 100644
index 000000000000..a6c93230c6cd
--- /dev/null
+++ b/tools/llvm-mc/AsmLexer.h
@@ -0,0 +1,109 @@
+//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class declares the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMLEXER_H
+#define ASMLEXER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <cassert>
+
+namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+class SMLoc;
+
+namespace asmtok {
+ enum TokKind {
+ // Markers
+ Eof, Error,
+
+ // String values.
+ Identifier,
+ Register,
+ String,
+
+ // Integer values.
+ IntVal,
+
+ // No-value.
+ EndOfStatement,
+ Colon,
+ Plus, Minus, Tilde,
+ Slash, // '/'
+ LParen, RParen,
+ Star, Comma, Dollar
+ };
+}
+
+/// AsmLexer - Lexer class for assembly files.
+class AsmLexer {
+ SourceMgr &SrcMgr;
+
+ const char *CurPtr;
+ const MemoryBuffer *CurBuf;
+
+ // Information about the current token.
+ const char *TokStart;
+ asmtok::TokKind CurKind;
+ std::string CurStrVal; // This is valid for Identifier.
+ int64_t CurIntVal;
+
+ /// CurBuffer - This is the current buffer index we're lexing from as managed
+ /// by the SourceMgr object.
+ int CurBuffer;
+
+public:
+ AsmLexer(SourceMgr &SrcMgr);
+ ~AsmLexer() {}
+
+ asmtok::TokKind Lex() {
+ return CurKind = LexToken();
+ }
+
+ asmtok::TokKind getKind() const { return CurKind; }
+ bool is(asmtok::TokKind K) const { return CurKind == K; }
+ bool isNot(asmtok::TokKind K) const { return CurKind != K; }
+
+ const std::string &getCurStrVal() const {
+ assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register ||
+ CurKind == asmtok::String) &&
+ "This token doesn't have a string value");
+ return CurStrVal;
+ }
+ int64_t getCurIntVal() const {
+ assert(CurKind == asmtok::IntVal && "This token isn't an integer");
+ return CurIntVal;
+ }
+
+ SMLoc getLoc() const;
+
+ void PrintMessage(SMLoc Loc, const std::string &Msg) const;
+
+private:
+ int getNextChar();
+ asmtok::TokKind ReturnError(const char *Loc, const std::string &Msg);
+
+ /// LexToken - Read the next token and return its code.
+ asmtok::TokKind LexToken();
+ asmtok::TokKind LexIdentifier();
+ asmtok::TokKind LexPercent();
+ asmtok::TokKind LexSlash();
+ asmtok::TokKind LexHash();
+ asmtok::TokKind LexDigit();
+ asmtok::TokKind LexQuote();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp
new file mode 100644
index 000000000000..715ff3932bc6
--- /dev/null
+++ b/tools/llvm-mc/AsmParser.cpp
@@ -0,0 +1,351 @@
+//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the parser for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool AsmParser::Error(SMLoc L, const char *Msg) {
+ Lexer.PrintMessage(L, Msg);
+ return true;
+}
+
+bool AsmParser::TokError(const char *Msg) {
+ Lexer.PrintMessage(Lexer.getLoc(), Msg);
+ return true;
+}
+
+bool AsmParser::Run() {
+ // Prime the lexer.
+ Lexer.Lex();
+
+ while (Lexer.isNot(asmtok::Eof))
+ if (ParseStatement())
+ return true;
+
+ return false;
+}
+
+/// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
+void AsmParser::EatToEndOfStatement() {
+ while (Lexer.isNot(asmtok::EndOfStatement) &&
+ Lexer.isNot(asmtok::Eof))
+ Lexer.Lex();
+
+ // Eat EOL.
+ if (Lexer.is(asmtok::EndOfStatement))
+ Lexer.Lex();
+}
+
+
+struct AsmParser::X86Operand {
+ enum {
+ Register,
+ Immediate,
+ Memory
+ } Kind;
+
+ union {
+ struct {
+ unsigned RegNo;
+ } Reg;
+
+ struct {
+ // FIXME: Should be a general expression.
+ int64_t Val;
+ } Imm;
+
+ struct {
+ unsigned SegReg;
+ int64_t Disp; // FIXME: Should be a general expression.
+ unsigned BaseReg;
+ unsigned Scale;
+ unsigned ScaleReg;
+ } Mem;
+ };
+
+ static X86Operand CreateReg(unsigned RegNo) {
+ X86Operand Res;
+ Res.Kind = Register;
+ Res.Reg.RegNo = RegNo;
+ return Res;
+ }
+ static X86Operand CreateImm(int64_t Val) {
+ X86Operand Res;
+ Res.Kind = Immediate;
+ Res.Imm.Val = Val;
+ return Res;
+ }
+ static X86Operand CreateMem(unsigned SegReg, int64_t Disp, unsigned BaseReg,
+ unsigned Scale, unsigned ScaleReg) {
+ X86Operand Res;
+ Res.Kind = Memory;
+ Res.Mem.SegReg = SegReg;
+ Res.Mem.Disp = Disp;
+ Res.Mem.BaseReg = BaseReg;
+ Res.Mem.Scale = Scale;
+ Res.Mem.ScaleReg = ScaleReg;
+ return Res;
+ }
+};
+
+bool AsmParser::ParseX86Operand(X86Operand &Op) {
+ switch (Lexer.getKind()) {
+ default:
+ return ParseX86MemOperand(Op);
+ case asmtok::Register:
+ // FIXME: Decode reg #.
+ // FIXME: if a segment register, this could either be just the seg reg, or
+ // the start of a memory operand.
+ Op = X86Operand::CreateReg(123);
+ Lexer.Lex(); // Eat register.
+ return false;
+ case asmtok::Dollar: {
+ // $42 -> immediate.
+ Lexer.Lex();
+ int64_t Val;
+ if (ParseExpression(Val))
+ return TokError("expected integer constant");
+ Op = X86Operand::CreateReg(Val);
+ return false;
+ case asmtok::Star:
+ Lexer.Lex(); // Eat the star.
+
+ if (Lexer.is(asmtok::Register)) {
+ Op = X86Operand::CreateReg(123);
+ Lexer.Lex(); // Eat register.
+ } else if (ParseX86MemOperand(Op))
+ return true;
+
+ // FIXME: Note that these are 'dereferenced' so that clients know the '*' is
+ // there.
+ return false;
+ }
+ }
+}
+
+/// ParseX86MemOperand: segment: disp(basereg, indexreg, scale)
+bool AsmParser::ParseX86MemOperand(X86Operand &Op) {
+ // FIXME: If SegReg ':' (e.g. %gs:), eat and remember.
+ unsigned SegReg = 0;
+
+
+ // We have to disambiguate a parenthesized expression "(4+5)" from the start
+ // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
+ // only way to do this without lookahead is to eat the ( and see what is after
+ // it.
+ int64_t Disp = 0;
+ if (Lexer.isNot(asmtok::LParen)) {
+ if (ParseExpression(Disp)) return true;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (Lexer.isNot(asmtok::LParen)) {
+ Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0);
+ return false;
+ }
+
+ // Eat the '('.
+ Lexer.Lex();
+ } else {
+ // Okay, we have a '('. We don't know if this is an expression or not, but
+ // so we have to eat the ( to see beyond it.
+ Lexer.Lex(); // Eat the '('.
+
+ if (Lexer.is(asmtok::Register) || Lexer.is(asmtok::Comma)) {
+ // Nothing to do here, fall into the code below with the '(' part of the
+ // memory operand consumed.
+ } else {
+ // It must be an parenthesized expression, parse it now.
+ if (ParseParenExpr(Disp)) return true;
+
+ // After parsing the base expression we could either have a parenthesized
+ // memory address or not. If not, return now. If so, eat the (.
+ if (Lexer.isNot(asmtok::LParen)) {
+ Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0);
+ return false;
+ }
+
+ // Eat the '('.
+ Lexer.Lex();
+ }
+ }
+
+ // If we reached here, then we just ate the ( of the memory operand. Process
+ // the rest of the memory operand.
+ unsigned BaseReg = 0, ScaleReg = 0, Scale = 0;
+
+ if (Lexer.is(asmtok::Register)) {
+ BaseReg = 123; // FIXME: decode reg #
+ Lexer.Lex(); // eat the register.
+ }
+
+ if (Lexer.is(asmtok::Comma)) {
+ Lexer.Lex(); // eat the comma.
+
+ if (Lexer.is(asmtok::Register)) {
+ ScaleReg = 123; // FIXME: decode reg #
+ Lexer.Lex(); // eat the register.
+ Scale = 1; // If not specified, the scale defaults to 1.
+ }
+
+ if (Lexer.is(asmtok::Comma)) {
+ Lexer.Lex(); // eat the comma.
+
+ // If present, get and validate scale amount.
+ if (Lexer.is(asmtok::IntVal)) {
+ int64_t ScaleVal = Lexer.getCurIntVal();
+ if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8)
+ return TokError("scale factor in address must be 1, 2, 4 or 8");
+ Lexer.Lex(); // eat the scale.
+ Scale = (unsigned)ScaleVal;
+ }
+ }
+ }
+
+ // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+ if (Lexer.isNot(asmtok::RParen))
+ return TokError("unexpected token in memory operand");
+ Lexer.Lex(); // Eat the ')'.
+
+ Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, Scale, ScaleReg);
+ return false;
+}
+
+/// ParseParenExpr - Parse a paren expression and return it.
+/// NOTE: This assumes the leading '(' has already been consumed.
+///
+/// parenexpr ::= expr)
+///
+bool AsmParser::ParseParenExpr(int64_t &Res) {
+ if (ParseExpression(Res)) return true;
+ if (Lexer.isNot(asmtok::RParen))
+ return TokError("expected ')' in parentheses expression");
+ Lexer.Lex();
+ return false;
+}
+
+/// ParsePrimaryExpr - Parse a primary expression and return it.
+/// primaryexpr ::= (parenexpr
+/// primaryexpr ::= symbol
+/// primaryexpr ::= number
+/// primaryexpr ::= ~,+,- primaryexpr
+bool AsmParser::ParsePrimaryExpr(int64_t &Res) {
+ switch (Lexer.getKind()) {
+ default:
+ return TokError("unknown token in expression");
+ case asmtok::Identifier:
+ // This is a label, this should be parsed as part of an expression, to
+ // handle things like LFOO+4
+ Res = 0; // FIXME.
+ Lexer.Lex(); // Eat identifier.
+ return false;
+ case asmtok::IntVal:
+ Res = Lexer.getCurIntVal();
+ Lexer.Lex(); // Eat identifier.
+ return false;
+ case asmtok::LParen:
+ Lexer.Lex(); // Eat the '('.
+ return ParseParenExpr(Res);
+ case asmtok::Tilde:
+ case asmtok::Plus:
+ case asmtok::Minus:
+ Lexer.Lex(); // Eat the operator.
+ return ParsePrimaryExpr(Res);
+ }
+}
+
+/// ParseExpression - Parse an expression and return it.
+///
+/// expr ::= expr +,- expr -> lowest.
+/// expr ::= expr |,^,&,! expr -> middle.
+/// expr ::= expr *,/,%,<<,>> expr -> highest.
+/// expr ::= primaryexpr
+///
+bool AsmParser::ParseExpression(int64_t &Res) {
+ return ParsePrimaryExpr(Res);
+}
+
+
+
+
+/// ParseStatement:
+/// ::= EndOfStatement
+/// ::= Label* Directive ...Operands... EndOfStatement
+/// ::= Label* Identifier OperandList* EndOfStatement
+bool AsmParser::ParseStatement() {
+ switch (Lexer.getKind()) {
+ default:
+ return TokError("unexpected token at start of statement");
+ case asmtok::EndOfStatement:
+ Lexer.Lex();
+ return false;
+ case asmtok::Identifier:
+ break;
+ // TODO: Recurse on local labels etc.
+ }
+
+ // If we have an identifier, handle it as the key symbol.
+ SMLoc IDLoc = Lexer.getLoc();
+ std::string IDVal = Lexer.getCurStrVal();
+
+ // Consume the identifier, see what is after it.
+ if (Lexer.Lex() == asmtok::Colon) {
+ // identifier ':' -> Label.
+ Lexer.Lex();
+ return ParseStatement();
+ }
+
+ // Otherwise, we have a normal instruction or directive.
+ if (IDVal[0] == '.') {
+ Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now");
+ EatToEndOfStatement();
+ return false;
+ }
+
+ // If it's an instruction, parse an operand list.
+ std::vector<X86Operand> Operands;
+
+ // Read the first operand, if present. Note that we require a newline at the
+ // end of file, so we don't have to worry about Eof here.
+ if (Lexer.isNot(asmtok::EndOfStatement)) {
+ X86Operand Op;
+ if (ParseX86Operand(Op))
+ return true;
+ Operands.push_back(Op);
+ }
+
+ while (Lexer.is(asmtok::Comma)) {
+ Lexer.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ X86Operand Op;
+ if (ParseX86Operand(Op))
+ return true;
+ Operands.push_back(Op);
+ }
+
+ if (Lexer.isNot(asmtok::EndOfStatement))
+ return TokError("unexpected token in operand list");
+
+ // Eat the end of statement marker.
+ Lexer.Lex();
+
+ // Instruction is good, process it.
+ outs() << "Found instruction: " << IDVal << " with " << Operands.size()
+ << " operands.\n";
+
+ // Skip to end of line for now.
+ return false;
+}
diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h
new file mode 100644
index 000000000000..82eb433b61ef
--- /dev/null
+++ b/tools/llvm-mc/AsmParser.h
@@ -0,0 +1,48 @@
+//===- AsmParser.h - Parser for Assembly Files ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class declares the parser for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMPARSER_H
+#define ASMPARSER_H
+
+#include "AsmLexer.h"
+
+namespace llvm {
+
+class AsmParser {
+ AsmLexer Lexer;
+ struct X86Operand;
+
+public:
+ AsmParser(SourceMgr &SM) : Lexer(SM) {}
+ ~AsmParser() {}
+
+ bool Run();
+
+private:
+ bool ParseStatement();
+
+ bool Error(SMLoc L, const char *Msg);
+ bool TokError(const char *Msg);
+
+ void EatToEndOfStatement();
+
+ bool ParseX86Operand(X86Operand &Op);
+ bool ParseX86MemOperand(X86Operand &Op);
+ bool ParseExpression(int64_t &Res);
+ bool ParsePrimaryExpr(int64_t &Res);
+ bool ParseParenExpr(int64_t &Res);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt
new file mode 100644
index 000000000000..d8195e7418c3
--- /dev/null
+++ b/tools/llvm-mc/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_llvm_tool(llvm-mc
+ llvm-mc.cpp
+ AsmLexer.cpp
+ AsmParser.cpp
+ )
diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile
new file mode 100644
index 000000000000..7b4d94445656
--- /dev/null
+++ b/tools/llvm-mc/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-mc/Makefile ------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-mc
+LINK_COMPONENTS := support
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
new file mode 100644
index 000000000000..52205c48d0fb
--- /dev/null
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -0,0 +1,161 @@
+//===-- llvm-mc.cpp - Machine Code Hacking Driver -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility is a simple driver that allows command line hacking on machine
+// code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Signals.h"
+#include "AsmParser.h"
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"),
+ cl::value_desc("filename"));
+
+static cl::list<std::string>
+IncludeDirs("I", cl::desc("Directory of include files"),
+ cl::value_desc("directory"), cl::Prefix);
+
+enum ActionType {
+ AC_AsLex,
+ AC_Assemble
+};
+
+static cl::opt<ActionType>
+Action(cl::desc("Action to perform:"),
+ cl::init(AC_Assemble),
+ cl::values(clEnumValN(AC_AsLex, "as-lex",
+ "Lex tokens from a .s file"),
+ clEnumValN(AC_Assemble, "assemble",
+ "Assemble a .s file (default)"),
+ clEnumValEnd));
+
+static int AsLexInput(const char *ProgName) {
+ std::string ErrorMessage;
+ MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
+ &ErrorMessage);
+ if (Buffer == 0) {
+ errs() << ProgName << ": ";
+ if (ErrorMessage.size())
+ errs() << ErrorMessage << "\n";
+ else
+ errs() << "input file didn't read correctly.\n";
+ return 1;
+ }
+
+ SourceMgr SrcMgr;
+
+ // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+ SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+ // Record the location of the include directories so that the lexer can find
+ // it later.
+ SrcMgr.setIncludeDirs(IncludeDirs);
+
+ AsmLexer Lexer(SrcMgr);
+
+ bool Error = false;
+
+ asmtok::TokKind Tok = Lexer.Lex();
+ while (Tok != asmtok::Eof) {
+ switch (Tok) {
+ default:
+ Lexer.PrintMessage(Lexer.getLoc(), "driver: unknown token");
+ Error = true;
+ break;
+ case asmtok::Error:
+ Error = true; // error already printed.
+ break;
+ case asmtok::Identifier:
+ outs() << "identifier: " << Lexer.getCurStrVal() << '\n';
+ break;
+ case asmtok::Register:
+ outs() << "register: " << Lexer.getCurStrVal() << '\n';
+ break;
+ case asmtok::String:
+ outs() << "string: " << Lexer.getCurStrVal() << '\n';
+ break;
+ case asmtok::IntVal:
+ outs() << "int: " << Lexer.getCurIntVal() << '\n';
+ break;
+ case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break;
+ case asmtok::Colon: outs() << "Colon\n"; break;
+ case asmtok::Plus: outs() << "Plus\n"; break;
+ case asmtok::Minus: outs() << "Minus\n"; break;
+ case asmtok::Tilde: outs() << "Tilde\n"; break;
+ case asmtok::Slash: outs() << "Slash\n"; break;
+ case asmtok::LParen: outs() << "LParen\n"; break;
+ case asmtok::RParen: outs() << "RParen\n"; break;
+ case asmtok::Star: outs() << "Star\n"; break;
+ case asmtok::Comma: outs() << "Comma\n"; break;
+ case asmtok::Dollar: outs() << "Dollar\n"; break;
+ }
+
+ Tok = Lexer.Lex();
+ }
+
+ return Error;
+}
+
+static int AssembleInput(const char *ProgName) {
+ std::string ErrorMessage;
+ MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
+ &ErrorMessage);
+ if (Buffer == 0) {
+ errs() << ProgName << ": ";
+ if (ErrorMessage.size())
+ errs() << ErrorMessage << "\n";
+ else
+ errs() << "input file didn't read correctly.\n";
+ return 1;
+ }
+
+ SourceMgr SrcMgr;
+
+ // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+ SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+ // Record the location of the include directories so that the lexer can find
+ // it later.
+ SrcMgr.setIncludeDirs(IncludeDirs);
+
+ AsmParser Parser(SrcMgr);
+ return Parser.Run();
+}
+
+
+int main(int argc, char **argv) {
+ // Print a stack trace if we signal out.
+ sys::PrintStackTraceOnErrorSignal();
+ PrettyStackTraceProgram X(argc, argv);
+ llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+ cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
+
+ switch (Action) {
+ default:
+ case AC_AsLex:
+ return AsLexInput(argv[0]);
+ case AC_Assemble:
+ return AssembleInput(argv[0]);
+ }
+
+ return 0;
+}
+