diff options
Diffstat (limited to 'tools/llvm-mc/AsmLexer.cpp')
-rw-r--r-- | tools/llvm-mc/AsmLexer.cpp | 258 |
1 files changed, 258 insertions, 0 deletions
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp new file mode 100644 index 000000000000..0828594a35b5 --- /dev/null +++ b/tools/llvm-mc/AsmLexer.cpp @@ -0,0 +1,258 @@ +//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "AsmLexer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cerrno> +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) { + CurBuffer = 0; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + TokStart = 0; +} + +SMLoc AsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + +void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg) const { + SrcMgr.PrintMessage(Loc, Msg); +} + +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return asmtok::Error. +asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { + SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg); + return asmtok::Error; +} + +int AsmLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: { + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = ParentIncludeLoc.getPointer(); + return getNextChar(); + } + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } + } +} + +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +asmtok::TokKind AsmLexer::LexIdentifier() { + while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || + *CurPtr == '.' || *CurPtr == '@') + ++CurPtr; + CurStrVal.assign(TokStart, CurPtr); // Include % + return asmtok::Identifier; +} + +/// LexPercent: Register: %[a-zA-Z0-9]+ +asmtok::TokKind AsmLexer::LexPercent() { + if (!isalnum(*CurPtr)) + return ReturnError(TokStart, "invalid register name"); + while (isalnum(*CurPtr)) + ++CurPtr; + CurStrVal.assign(TokStart, CurPtr); // Skip % + return asmtok::Register; +} + +/// LexSlash: Slash: / +/// C-Style Comment: /* ... */ +asmtok::TokKind AsmLexer::LexSlash() { + if (*CurPtr != '*') + return asmtok::Slash; + + // C Style comment. + ++CurPtr; // skip the star. + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + return ReturnError(TokStart, "unterminated comment"); + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + return LexToken(); + } + } +} + +/// LexHash: Comment: #[^\n]* +asmtok::TokKind AsmLexer::LexHash() { + int CurChar = getNextChar(); + while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) + CurChar = getNextChar(); + + if (CurChar == EOF) + return asmtok::Eof; + return asmtok::EndOfStatement; +} + + +/// LexDigit: First character is [0-9]. +/// Local Label: [0-9][:] +/// Forward/Backward Label: [0-9][fb] +/// Binary integer: 0b[01]+ +/// Octal integer: 0[0-7]+ +/// Hex integer: 0x[0-9a-fA-F]+ +/// Decimal integer: [1-9][0-9]* +/// TODO: FP literal. +asmtok::TokKind AsmLexer::LexDigit() { + if (*CurPtr == ':') + return ReturnError(TokStart, "FIXME: local label not implemented"); + if (*CurPtr == 'f' || *CurPtr == 'b') + return ReturnError(TokStart, "FIXME: directional label not implemented"); + + // Decimal integer: [1-9][0-9]* + if (CurPtr[-1] != '0') { + while (isdigit(*CurPtr)) + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 10); + return asmtok::IntVal; + } + + if (*CurPtr == 'b') { + ++CurPtr; + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid binary number"); + CurIntVal = strtoll(NumStart, 0, 2); + return asmtok::IntVal; + } + + if (*CurPtr == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + + errno = 0; + CurIntVal = strtoll(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + if (errno == ERANGE) { + errno = 0; + CurIntVal = (int64_t)strtoull(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + if (errno == ERANGE) + return ReturnError(CurPtr-2, "Hexadecimal number out of range"); + } + return asmtok::IntVal; + } + + // Must be an octal number, it starts with 0. + while (*CurPtr >= '0' && *CurPtr <= '7') + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 8); + return asmtok::IntVal; +} + +/// LexQuote: String: "..." +asmtok::TokKind AsmLexer::LexQuote() { + int CurChar = getNextChar(); + // TODO: does gas allow multiline string constants? + while (CurChar != '"') { + if (CurChar == '\\') { + // Allow \", etc. + CurChar = getNextChar(); + } + + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated string constant"); + + CurChar = getNextChar(); + } + + CurStrVal.assign(TokStart, CurPtr); // include quotes. + return asmtok::String; +} + + +asmtok::TokKind AsmLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + switch (CurChar) { + default: + // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') + return LexIdentifier(); + + // Unknown character, emit an error. + return ReturnError(TokStart, "invalid character in input"); + case EOF: return asmtok::Eof; + case 0: + case ' ': + case '\t': + // Ignore whitespace. + return LexToken(); + case '\n': // FALL THROUGH. + case '\r': // FALL THROUGH. + case ';': return asmtok::EndOfStatement; + case ':': return asmtok::Colon; + case '+': return asmtok::Plus; + case '-': return asmtok::Minus; + case '~': return asmtok::Tilde; + case '(': return asmtok::LParen; + case ')': return asmtok::RParen; + case '*': return asmtok::Star; + case ',': return asmtok::Comma; + case '$': return asmtok::Dollar; + case '%': return LexPercent(); + case '/': return LexSlash(); + case '#': return LexHash(); + case '"': return LexQuote(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return LexDigit(); + + // TODO: Quoted identifiers (objc methods etc) + // local labels: [0-9][:] + // Forward/backward labels: [0-9][fb] + // Integers, fp constants, character constants. + } +} |