diff options
Diffstat (limited to 'tools/llvm-mc')
-rw-r--r-- | tools/llvm-mc/AsmLexer.cpp | 258 | ||||
-rw-r--r-- | tools/llvm-mc/AsmLexer.h | 109 | ||||
-rw-r--r-- | tools/llvm-mc/AsmParser.cpp | 351 | ||||
-rw-r--r-- | tools/llvm-mc/AsmParser.h | 48 | ||||
-rw-r--r-- | tools/llvm-mc/CMakeLists.txt | 7 | ||||
-rw-r--r-- | tools/llvm-mc/Makefile | 17 | ||||
-rw-r--r-- | tools/llvm-mc/llvm-mc.cpp | 161 |
7 files changed, 951 insertions, 0 deletions
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp new file mode 100644 index 000000000000..0828594a35b5 --- /dev/null +++ b/tools/llvm-mc/AsmLexer.cpp @@ -0,0 +1,258 @@ +//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "AsmLexer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cerrno> +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) { + CurBuffer = 0; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + TokStart = 0; +} + +SMLoc AsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + +void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg) const { + SrcMgr.PrintMessage(Loc, Msg); +} + +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return asmtok::Error. +asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { + SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg); + return asmtok::Error; +} + +int AsmLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: { + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = ParentIncludeLoc.getPointer(); + return getNextChar(); + } + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } + } +} + +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +asmtok::TokKind AsmLexer::LexIdentifier() { + while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || + *CurPtr == '.' || *CurPtr == '@') + ++CurPtr; + CurStrVal.assign(TokStart, CurPtr); // Include % + return asmtok::Identifier; +} + +/// LexPercent: Register: %[a-zA-Z0-9]+ +asmtok::TokKind AsmLexer::LexPercent() { + if (!isalnum(*CurPtr)) + return ReturnError(TokStart, "invalid register name"); + while (isalnum(*CurPtr)) + ++CurPtr; + CurStrVal.assign(TokStart, CurPtr); // Skip % + return asmtok::Register; +} + +/// LexSlash: Slash: / +/// C-Style Comment: /* ... */ +asmtok::TokKind AsmLexer::LexSlash() { + if (*CurPtr != '*') + return asmtok::Slash; + + // C Style comment. + ++CurPtr; // skip the star. + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + return ReturnError(TokStart, "unterminated comment"); + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + return LexToken(); + } + } +} + +/// LexHash: Comment: #[^\n]* +asmtok::TokKind AsmLexer::LexHash() { + int CurChar = getNextChar(); + while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) + CurChar = getNextChar(); + + if (CurChar == EOF) + return asmtok::Eof; + return asmtok::EndOfStatement; +} + + +/// LexDigit: First character is [0-9]. +/// Local Label: [0-9][:] +/// Forward/Backward Label: [0-9][fb] +/// Binary integer: 0b[01]+ +/// Octal integer: 0[0-7]+ +/// Hex integer: 0x[0-9a-fA-F]+ +/// Decimal integer: [1-9][0-9]* +/// TODO: FP literal. +asmtok::TokKind AsmLexer::LexDigit() { + if (*CurPtr == ':') + return ReturnError(TokStart, "FIXME: local label not implemented"); + if (*CurPtr == 'f' || *CurPtr == 'b') + return ReturnError(TokStart, "FIXME: directional label not implemented"); + + // Decimal integer: [1-9][0-9]* + if (CurPtr[-1] != '0') { + while (isdigit(*CurPtr)) + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 10); + return asmtok::IntVal; + } + + if (*CurPtr == 'b') { + ++CurPtr; + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid binary number"); + CurIntVal = strtoll(NumStart, 0, 2); + return asmtok::IntVal; + } + + if (*CurPtr == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + + errno = 0; + CurIntVal = strtoll(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + if (errno == ERANGE) { + errno = 0; + CurIntVal = (int64_t)strtoull(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + if (errno == ERANGE) + return ReturnError(CurPtr-2, "Hexadecimal number out of range"); + } + return asmtok::IntVal; + } + + // Must be an octal number, it starts with 0. + while (*CurPtr >= '0' && *CurPtr <= '7') + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 8); + return asmtok::IntVal; +} + +/// LexQuote: String: "..." +asmtok::TokKind AsmLexer::LexQuote() { + int CurChar = getNextChar(); + // TODO: does gas allow multiline string constants? + while (CurChar != '"') { + if (CurChar == '\\') { + // Allow \", etc. + CurChar = getNextChar(); + } + + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated string constant"); + + CurChar = getNextChar(); + } + + CurStrVal.assign(TokStart, CurPtr); // include quotes. + return asmtok::String; +} + + +asmtok::TokKind AsmLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + switch (CurChar) { + default: + // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') + return LexIdentifier(); + + // Unknown character, emit an error. + return ReturnError(TokStart, "invalid character in input"); + case EOF: return asmtok::Eof; + case 0: + case ' ': + case '\t': + // Ignore whitespace. + return LexToken(); + case '\n': // FALL THROUGH. + case '\r': // FALL THROUGH. + case ';': return asmtok::EndOfStatement; + case ':': return asmtok::Colon; + case '+': return asmtok::Plus; + case '-': return asmtok::Minus; + case '~': return asmtok::Tilde; + case '(': return asmtok::LParen; + case ')': return asmtok::RParen; + case '*': return asmtok::Star; + case ',': return asmtok::Comma; + case '$': return asmtok::Dollar; + case '%': return LexPercent(); + case '/': return LexSlash(); + case '#': return LexHash(); + case '"': return LexQuote(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return LexDigit(); + + // TODO: Quoted identifiers (objc methods etc) + // local labels: [0-9][:] + // Forward/backward labels: [0-9][fb] + // Integers, fp constants, character constants. + } +} diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h new file mode 100644 index 000000000000..a6c93230c6cd --- /dev/null +++ b/tools/llvm-mc/AsmLexer.h @@ -0,0 +1,109 @@ +//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class declares the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMLEXER_H +#define ASMLEXER_H + +#include "llvm/Support/DataTypes.h" +#include <string> +#include <cassert> + +namespace llvm { +class MemoryBuffer; +class SourceMgr; +class SMLoc; + +namespace asmtok { + enum TokKind { + // Markers + Eof, Error, + + // String values. + Identifier, + Register, + String, + + // Integer values. + IntVal, + + // No-value. + EndOfStatement, + Colon, + Plus, Minus, Tilde, + Slash, // '/' + LParen, RParen, + Star, Comma, Dollar + }; +} + +/// AsmLexer - Lexer class for assembly files. +class AsmLexer { + SourceMgr &SrcMgr; + + const char *CurPtr; + const MemoryBuffer *CurBuf; + + // Information about the current token. + const char *TokStart; + asmtok::TokKind CurKind; + std::string CurStrVal; // This is valid for Identifier. + int64_t CurIntVal; + + /// CurBuffer - This is the current buffer index we're lexing from as managed + /// by the SourceMgr object. + int CurBuffer; + +public: + AsmLexer(SourceMgr &SrcMgr); + ~AsmLexer() {} + + asmtok::TokKind Lex() { + return CurKind = LexToken(); + } + + asmtok::TokKind getKind() const { return CurKind; } + bool is(asmtok::TokKind K) const { return CurKind == K; } + bool isNot(asmtok::TokKind K) const { return CurKind != K; } + + const std::string &getCurStrVal() const { + assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register || + CurKind == asmtok::String) && + "This token doesn't have a string value"); + return CurStrVal; + } + int64_t getCurIntVal() const { + assert(CurKind == asmtok::IntVal && "This token isn't an integer"); + return CurIntVal; + } + + SMLoc getLoc() const; + + void PrintMessage(SMLoc Loc, const std::string &Msg) const; + +private: + int getNextChar(); + asmtok::TokKind ReturnError(const char *Loc, const std::string &Msg); + + /// LexToken - Read the next token and return its code. + asmtok::TokKind LexToken(); + asmtok::TokKind LexIdentifier(); + asmtok::TokKind LexPercent(); + asmtok::TokKind LexSlash(); + asmtok::TokKind LexHash(); + asmtok::TokKind LexDigit(); + asmtok::TokKind LexQuote(); +}; + +} // end namespace llvm + +#endif diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp new file mode 100644 index 000000000000..715ff3932bc6 --- /dev/null +++ b/tools/llvm-mc/AsmParser.cpp @@ -0,0 +1,351 @@ +//===- AsmParser.cpp - Parser for Assembly Files --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the parser for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "AsmParser.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +bool AsmParser::Error(SMLoc L, const char *Msg) { + Lexer.PrintMessage(L, Msg); + return true; +} + +bool AsmParser::TokError(const char *Msg) { + Lexer.PrintMessage(Lexer.getLoc(), Msg); + return true; +} + +bool AsmParser::Run() { + // Prime the lexer. + Lexer.Lex(); + + while (Lexer.isNot(asmtok::Eof)) + if (ParseStatement()) + return true; + + return false; +} + +/// EatToEndOfStatement - Throw away the rest of the line for testing purposes. +void AsmParser::EatToEndOfStatement() { + while (Lexer.isNot(asmtok::EndOfStatement) && + Lexer.isNot(asmtok::Eof)) + Lexer.Lex(); + + // Eat EOL. + if (Lexer.is(asmtok::EndOfStatement)) + Lexer.Lex(); +} + + +struct AsmParser::X86Operand { + enum { + Register, + Immediate, + Memory + } Kind; + + union { + struct { + unsigned RegNo; + } Reg; + + struct { + // FIXME: Should be a general expression. + int64_t Val; + } Imm; + + struct { + unsigned SegReg; + int64_t Disp; // FIXME: Should be a general expression. + unsigned BaseReg; + unsigned Scale; + unsigned ScaleReg; + } Mem; + }; + + static X86Operand CreateReg(unsigned RegNo) { + X86Operand Res; + Res.Kind = Register; + Res.Reg.RegNo = RegNo; + return Res; + } + static X86Operand CreateImm(int64_t Val) { + X86Operand Res; + Res.Kind = Immediate; + Res.Imm.Val = Val; + return Res; + } + static X86Operand CreateMem(unsigned SegReg, int64_t Disp, unsigned BaseReg, + unsigned Scale, unsigned ScaleReg) { + X86Operand Res; + Res.Kind = Memory; + Res.Mem.SegReg = SegReg; + Res.Mem.Disp = Disp; + Res.Mem.BaseReg = BaseReg; + Res.Mem.Scale = Scale; + Res.Mem.ScaleReg = ScaleReg; + return Res; + } +}; + +bool AsmParser::ParseX86Operand(X86Operand &Op) { + switch (Lexer.getKind()) { + default: + return ParseX86MemOperand(Op); + case asmtok::Register: + // FIXME: Decode reg #. + // FIXME: if a segment register, this could either be just the seg reg, or + // the start of a memory operand. + Op = X86Operand::CreateReg(123); + Lexer.Lex(); // Eat register. + return false; + case asmtok::Dollar: { + // $42 -> immediate. + Lexer.Lex(); + int64_t Val; + if (ParseExpression(Val)) + return TokError("expected integer constant"); + Op = X86Operand::CreateReg(Val); + return false; + case asmtok::Star: + Lexer.Lex(); // Eat the star. + + if (Lexer.is(asmtok::Register)) { + Op = X86Operand::CreateReg(123); + Lexer.Lex(); // Eat register. + } else if (ParseX86MemOperand(Op)) + return true; + + // FIXME: Note that these are 'dereferenced' so that clients know the '*' is + // there. + return false; + } + } +} + +/// ParseX86MemOperand: segment: disp(basereg, indexreg, scale) +bool AsmParser::ParseX86MemOperand(X86Operand &Op) { + // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. + unsigned SegReg = 0; + + + // We have to disambiguate a parenthesized expression "(4+5)" from the start + // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The + // only way to do this without lookahead is to eat the ( and see what is after + // it. + int64_t Disp = 0; + if (Lexer.isNot(asmtok::LParen)) { + if (ParseExpression(Disp)) return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (Lexer.isNot(asmtok::LParen)) { + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0); + return false; + } + + // Eat the '('. + Lexer.Lex(); + } else { + // Okay, we have a '('. We don't know if this is an expression or not, but + // so we have to eat the ( to see beyond it. + Lexer.Lex(); // Eat the '('. + + if (Lexer.is(asmtok::Register) || Lexer.is(asmtok::Comma)) { + // Nothing to do here, fall into the code below with the '(' part of the + // memory operand consumed. + } else { + // It must be an parenthesized expression, parse it now. + if (ParseParenExpr(Disp)) return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (Lexer.isNot(asmtok::LParen)) { + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0); + return false; + } + + // Eat the '('. + Lexer.Lex(); + } + } + + // If we reached here, then we just ate the ( of the memory operand. Process + // the rest of the memory operand. + unsigned BaseReg = 0, ScaleReg = 0, Scale = 0; + + if (Lexer.is(asmtok::Register)) { + BaseReg = 123; // FIXME: decode reg # + Lexer.Lex(); // eat the register. + } + + if (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // eat the comma. + + if (Lexer.is(asmtok::Register)) { + ScaleReg = 123; // FIXME: decode reg # + Lexer.Lex(); // eat the register. + Scale = 1; // If not specified, the scale defaults to 1. + } + + if (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // eat the comma. + + // If present, get and validate scale amount. + if (Lexer.is(asmtok::IntVal)) { + int64_t ScaleVal = Lexer.getCurIntVal(); + if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8) + return TokError("scale factor in address must be 1, 2, 4 or 8"); + Lexer.Lex(); // eat the scale. + Scale = (unsigned)ScaleVal; + } + } + } + + // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. + if (Lexer.isNot(asmtok::RParen)) + return TokError("unexpected token in memory operand"); + Lexer.Lex(); // Eat the ')'. + + Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, Scale, ScaleReg); + return false; +} + +/// ParseParenExpr - Parse a paren expression and return it. +/// NOTE: This assumes the leading '(' has already been consumed. +/// +/// parenexpr ::= expr) +/// +bool AsmParser::ParseParenExpr(int64_t &Res) { + if (ParseExpression(Res)) return true; + if (Lexer.isNot(asmtok::RParen)) + return TokError("expected ')' in parentheses expression"); + Lexer.Lex(); + return false; +} + +/// ParsePrimaryExpr - Parse a primary expression and return it. +/// primaryexpr ::= (parenexpr +/// primaryexpr ::= symbol +/// primaryexpr ::= number +/// primaryexpr ::= ~,+,- primaryexpr +bool AsmParser::ParsePrimaryExpr(int64_t &Res) { + switch (Lexer.getKind()) { + default: + return TokError("unknown token in expression"); + case asmtok::Identifier: + // This is a label, this should be parsed as part of an expression, to + // handle things like LFOO+4 + Res = 0; // FIXME. + Lexer.Lex(); // Eat identifier. + return false; + case asmtok::IntVal: + Res = Lexer.getCurIntVal(); + Lexer.Lex(); // Eat identifier. + return false; + case asmtok::LParen: + Lexer.Lex(); // Eat the '('. + return ParseParenExpr(Res); + case asmtok::Tilde: + case asmtok::Plus: + case asmtok::Minus: + Lexer.Lex(); // Eat the operator. + return ParsePrimaryExpr(Res); + } +} + +/// ParseExpression - Parse an expression and return it. +/// +/// expr ::= expr +,- expr -> lowest. +/// expr ::= expr |,^,&,! expr -> middle. +/// expr ::= expr *,/,%,<<,>> expr -> highest. +/// expr ::= primaryexpr +/// +bool AsmParser::ParseExpression(int64_t &Res) { + return ParsePrimaryExpr(Res); +} + + + + +/// ParseStatement: +/// ::= EndOfStatement +/// ::= Label* Directive ...Operands... EndOfStatement +/// ::= Label* Identifier OperandList* EndOfStatement +bool AsmParser::ParseStatement() { + switch (Lexer.getKind()) { + default: + return TokError("unexpected token at start of statement"); + case asmtok::EndOfStatement: + Lexer.Lex(); + return false; + case asmtok::Identifier: + break; + // TODO: Recurse on local labels etc. + } + + // If we have an identifier, handle it as the key symbol. + SMLoc IDLoc = Lexer.getLoc(); + std::string IDVal = Lexer.getCurStrVal(); + + // Consume the identifier, see what is after it. + if (Lexer.Lex() == asmtok::Colon) { + // identifier ':' -> Label. + Lexer.Lex(); + return ParseStatement(); + } + + // Otherwise, we have a normal instruction or directive. + if (IDVal[0] == '.') { + Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now"); + EatToEndOfStatement(); + return false; + } + + // If it's an instruction, parse an operand list. + std::vector<X86Operand> Operands; + + // Read the first operand, if present. Note that we require a newline at the + // end of file, so we don't have to worry about Eof here. + if (Lexer.isNot(asmtok::EndOfStatement)) { + X86Operand Op; + if (ParseX86Operand(Op)) + return true; + Operands.push_back(Op); + } + + while (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // Eat the comma. + + // Parse and remember the operand. + X86Operand Op; + if (ParseX86Operand(Op)) + return true; + Operands.push_back(Op); + } + + if (Lexer.isNot(asmtok::EndOfStatement)) + return TokError("unexpected token in operand list"); + + // Eat the end of statement marker. + Lexer.Lex(); + + // Instruction is good, process it. + outs() << "Found instruction: " << IDVal << " with " << Operands.size() + << " operands.\n"; + + // Skip to end of line for now. + return false; +} diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h new file mode 100644 index 000000000000..82eb433b61ef --- /dev/null +++ b/tools/llvm-mc/AsmParser.h @@ -0,0 +1,48 @@ +//===- AsmParser.h - Parser for Assembly Files ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class declares the parser for assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMPARSER_H +#define ASMPARSER_H + +#include "AsmLexer.h" + +namespace llvm { + +class AsmParser { + AsmLexer Lexer; + struct X86Operand; + +public: + AsmParser(SourceMgr &SM) : Lexer(SM) {} + ~AsmParser() {} + + bool Run(); + +private: + bool ParseStatement(); + + bool Error(SMLoc L, const char *Msg); + bool TokError(const char *Msg); + + void EatToEndOfStatement(); + + bool ParseX86Operand(X86Operand &Op); + bool ParseX86MemOperand(X86Operand &Op); + bool ParseExpression(int64_t &Res); + bool ParsePrimaryExpr(int64_t &Res); + bool ParseParenExpr(int64_t &Res); +}; + +} // end namespace llvm + +#endif diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt new file mode 100644 index 000000000000..d8195e7418c3 --- /dev/null +++ b/tools/llvm-mc/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS support) + +add_llvm_tool(llvm-mc + llvm-mc.cpp + AsmLexer.cpp + AsmParser.cpp + ) diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile new file mode 100644 index 000000000000..7b4d94445656 --- /dev/null +++ b/tools/llvm-mc/Makefile @@ -0,0 +1,17 @@ +##===- tools/llvm-mc/Makefile ------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = llvm-mc +LINK_COMPONENTS := support + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS = 1 + +include $(LEVEL)/Makefile.common diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp new file mode 100644 index 000000000000..52205c48d0fb --- /dev/null +++ b/tools/llvm-mc/llvm-mc.cpp @@ -0,0 +1,161 @@ +//===-- llvm-mc.cpp - Machine Code Hacking Driver -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This utility is a simple driver that allows command line hacking on machine +// code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Signals.h" +#include "AsmParser.h" +using namespace llvm; + +static cl::opt<std::string> +InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-")); + +static cl::opt<std::string> +OutputFilename("o", cl::desc("Output filename"), + cl::value_desc("filename")); + +static cl::list<std::string> +IncludeDirs("I", cl::desc("Directory of include files"), + cl::value_desc("directory"), cl::Prefix); + +enum ActionType { + AC_AsLex, + AC_Assemble +}; + +static cl::opt<ActionType> +Action(cl::desc("Action to perform:"), + cl::init(AC_Assemble), + cl::values(clEnumValN(AC_AsLex, "as-lex", + "Lex tokens from a .s file"), + clEnumValN(AC_Assemble, "assemble", + "Assemble a .s file (default)"), + clEnumValEnd)); + +static int AsLexInput(const char *ProgName) { + std::string ErrorMessage; + MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, + &ErrorMessage); + if (Buffer == 0) { + errs() << ProgName << ": "; + if (ErrorMessage.size()) + errs() << ErrorMessage << "\n"; + else + errs() << "input file didn't read correctly.\n"; + return 1; + } + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what TGParser will pick up. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + // Record the location of the include directories so that the lexer can find + // it later. + SrcMgr.setIncludeDirs(IncludeDirs); + + AsmLexer Lexer(SrcMgr); + + bool Error = false; + + asmtok::TokKind Tok = Lexer.Lex(); + while (Tok != asmtok::Eof) { + switch (Tok) { + default: + Lexer.PrintMessage(Lexer.getLoc(), "driver: unknown token"); + Error = true; + break; + case asmtok::Error: + Error = true; // error already printed. + break; + case asmtok::Identifier: + outs() << "identifier: " << Lexer.getCurStrVal() << '\n'; + break; + case asmtok::Register: + outs() << "register: " << Lexer.getCurStrVal() << '\n'; + break; + case asmtok::String: + outs() << "string: " << Lexer.getCurStrVal() << '\n'; + break; + case asmtok::IntVal: + outs() << "int: " << Lexer.getCurIntVal() << '\n'; + break; + case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break; + case asmtok::Colon: outs() << "Colon\n"; break; + case asmtok::Plus: outs() << "Plus\n"; break; + case asmtok::Minus: outs() << "Minus\n"; break; + case asmtok::Tilde: outs() << "Tilde\n"; break; + case asmtok::Slash: outs() << "Slash\n"; break; + case asmtok::LParen: outs() << "LParen\n"; break; + case asmtok::RParen: outs() << "RParen\n"; break; + case asmtok::Star: outs() << "Star\n"; break; + case asmtok::Comma: outs() << "Comma\n"; break; + case asmtok::Dollar: outs() << "Dollar\n"; break; + } + + Tok = Lexer.Lex(); + } + + return Error; +} + +static int AssembleInput(const char *ProgName) { + std::string ErrorMessage; + MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, + &ErrorMessage); + if (Buffer == 0) { + errs() << ProgName << ": "; + if (ErrorMessage.size()) + errs() << ErrorMessage << "\n"; + else + errs() << "input file didn't read correctly.\n"; + return 1; + } + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what TGParser will pick up. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + // Record the location of the include directories so that the lexer can find + // it later. + SrcMgr.setIncludeDirs(IncludeDirs); + + AsmParser Parser(SrcMgr); + return Parser.Run(); +} + + +int main(int argc, char **argv) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); + + switch (Action) { + default: + case AC_AsLex: + return AsLexInput(argv[0]); + case AC_Assemble: + return AssembleInput(argv[0]); + } + + return 0; +} + |