diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp | 174 |
1 files changed, 150 insertions, 24 deletions
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp index 5a571c7c0c0e..1fa22ab000f0 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SaveAndRestore.h" #include <cassert> @@ -63,6 +64,12 @@ int AsmLexer::getNextChar() { return (unsigned char)*CurPtr++; } +int AsmLexer::peekNextChar() { + if (CurPtr == CurBuf.end()) + return EOF; + return (unsigned char)*CurPtr; +} + /// The leading integral digit sequence and dot should have already been /// consumed, some or all of the fractional digit sequence *can* have been /// consumed. @@ -271,13 +278,34 @@ static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, return DefaultRadix; } -static AsmToken intToken(StringRef Ref, APInt &Value) -{ +static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) { + while (hexDigitValue(*CurPtr) < DefaultRadix) { + ++CurPtr; + } + return CurPtr; +} + +static AsmToken intToken(StringRef Ref, APInt &Value) { if (Value.isIntN(64)) return AsmToken(AsmToken::Integer, Ref, Value); return AsmToken(AsmToken::BigNum, Ref, Value); } +static std::string radixName(unsigned Radix) { + switch (Radix) { + case 2: + return "binary"; + case 8: + return "octal"; + case 10: + return "decimal"; + case 16: + return "hexadecimal"; + default: + return "base-" + std::to_string(Radix); + } +} + /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] /// Forward/Backward Label: [0-9][fb] @@ -286,16 +314,51 @@ static AsmToken intToken(StringRef Ref, APInt &Value) /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] /// Decimal integer: [1-9][0-9]* AsmToken AsmLexer::LexDigit() { - // MASM-flavor binary integer: [01]+[bB] + // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY]) + // MASM-flavor octal integer: [0-7]+[oOqQ] + // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT]) // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] if (LexMasmIntegers && isdigit(CurPtr[-1])) { - const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? - CurPtr - 1 : nullptr; + const char *FirstNonBinary = + (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr; + const char *FirstNonDecimal = + (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr; const char *OldCurPtr = CurPtr; while (isHexDigit(*CurPtr)) { - if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) - FirstNonBinary = CurPtr; + switch (*CurPtr) { + default: + if (!FirstNonDecimal) { + FirstNonDecimal = CurPtr; + } + LLVM_FALLTHROUGH; + case '9': + case '8': + case '7': + case '6': + case '5': + case '4': + case '3': + case '2': + if (!FirstNonBinary) { + FirstNonBinary = CurPtr; + } + break; + case '1': + case '0': + break; + } + ++CurPtr; + } + if (*CurPtr == '.') { + // MASM float literals (other than hex floats) always contain a ".", and + // are always written in decimal. + ++CurPtr; + return LexFloatLiteral(); + } + + if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) { ++CurPtr; + return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); } unsigned Radix = 0; @@ -303,28 +366,61 @@ AsmToken AsmLexer::LexDigit() { // hexadecimal number ++CurPtr; Radix = 16; + } else if (*CurPtr == 't' || *CurPtr == 'T') { + // decimal number + ++CurPtr; + Radix = 10; + } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' || + *CurPtr == 'Q') { + // octal number + ++CurPtr; + Radix = 8; + } else if (*CurPtr == 'y' || *CurPtr == 'Y') { + // binary number + ++CurPtr; + Radix = 2; + } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr && + DefaultRadix < 14 && + (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) { + Radix = 10; } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && - (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) + DefaultRadix < 12 && + (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) { Radix = 2; + } - if (Radix == 2 || Radix == 16) { + if (Radix) { StringRef Result(TokStart, CurPtr - TokStart); APInt Value(128, 0, true); if (Result.drop_back().getAsInteger(Radix, Value)) - return ReturnError(TokStart, Radix == 2 ? "invalid binary number" : - "invalid hexdecimal number"); + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); // MSVC accepts and ignores type suffices on integer literals. SkipIgnoredIntegerSuffix(CurPtr); return intToken(Result, Value); - } + } - // octal/decimal integers, or floating point numbers, fall through + // default-radix integers, or floating point numbers, fall through CurPtr = OldCurPtr; } + // MASM default-radix integers: [0-9a-fA-F]+ + // (All other integer literals have a radix specifier.) + if (LexMasmIntegers && UseMasmDefaultRadix) { + CurPtr = findLastDigit(CurPtr, 16); + StringRef Result(TokStart, CurPtr - TokStart); + + APInt Value(128, 0, true); + if (Result.getAsInteger(DefaultRadix, Value)) { + return ReturnError(TokStart, + "invalid " + radixName(DefaultRadix) + " number"); + } + + return intToken(Result, Value); + } + // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers); @@ -339,13 +435,9 @@ AsmToken AsmLexer::LexDigit() { StringRef Result(TokStart, CurPtr - TokStart); APInt Value(128, 0, true); - if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, !isHex ? "invalid decimal number" : - "invalid hexdecimal number"); - - // Consume the [hH]. - if (LexMasmIntegers && Radix == 16) - ++CurPtr; + if (Result.getAsInteger(Radix, Value)) { + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); + } // The darwin/x86 (and x86-64) assembler accepts and ignores type // suffices on integer literals. @@ -416,11 +508,9 @@ AsmToken AsmLexer::LexDigit() { // Either octal or hexadecimal. APInt Value(128, 0, true); unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers); - bool isHex = Radix == 16; StringRef Result(TokStart, CurPtr - TokStart); if (Result.getAsInteger(Radix, Value)) - return ReturnError(TokStart, !isHex ? "invalid octal number" : - "invalid hexdecimal number"); + return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); // Consume the [hH]. if (Radix == 16) @@ -437,6 +527,24 @@ AsmToken AsmLexer::LexDigit() { AsmToken AsmLexer::LexSingleQuote() { int CurChar = getNextChar(); + if (LexMasmStrings) { + while (CurChar != EOF) { + if (CurChar != '\'') { + CurChar = getNextChar(); + } else if (peekNextChar() == '\'') { + // In MASM single-quote strings, doubled single-quotes mean an escaped + // single quote, so should be lexed in. + getNextChar(); + CurChar = getNextChar(); + } else { + break; + } + } + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated string constant"); + return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); + } + if (CurChar == '\\') CurChar = getNextChar(); @@ -471,6 +579,24 @@ AsmToken AsmLexer::LexSingleQuote() { /// LexQuote: String: "..." AsmToken AsmLexer::LexQuote() { int CurChar = getNextChar(); + if (LexMasmStrings) { + while (CurChar != EOF) { + if (CurChar != '"') { + CurChar = getNextChar(); + } else if (peekNextChar() == '"') { + // In MASM double-quoted strings, doubled double-quotes mean an escaped + // double quote, so should be lexed in. + getNextChar(); + CurChar = getNextChar(); + } else { + break; + } + } + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated string constant"); + return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); + } + // TODO: does gas allow multiline string constants? while (CurChar != '"') { if (CurChar == '\\') { @@ -589,7 +715,7 @@ AsmToken AsmLexer::LexToken() { if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) { IsAtStartOfLine = true; IsAtStartOfStatement = true; - return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); } IsAtStartOfLine = false; bool OldIsAtStartOfStatement = IsAtStartOfStatement; |