summaryrefslogtreecommitdiff
path: root/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2012-04-14 14:01:31 +0000
committerDimitry Andric <dim@FreeBSD.org>2012-04-14 14:01:31 +0000
commitdbe13110f59f48b4dbb7552b3ac2935acdeece7f (patch)
treebe1815eb79b42ff482a8562b13c2dcbf0c5dcbee /lib/Lex/Lexer.cpp
parent9da628931ebf2609493570f87824ca22402cc65f (diff)
downloadsrc-test2-dbe13110f59f48b4dbb7552b3ac2935acdeece7f.tar.gz
src-test2-dbe13110f59f48b4dbb7552b3ac2935acdeece7f.zip
Notes
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r--lib/Lex/Lexer.cpp438
1 files changed, 348 insertions, 90 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index a98d889dbc98..535a852429b5 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -30,6 +30,7 @@
#include "clang/Lex/CodeCompletionHandler.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cstring>
@@ -59,6 +60,8 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
// Lexer Class Implementation
//===----------------------------------------------------------------------===//
+void Lexer::anchor() { }
+
void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
const char *BufEnd) {
InitCharacterInfo();
@@ -114,7 +117,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
: PreprocessorLexer(&PP, FID),
FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
- Features(PP.getLangOptions()) {
+ LangOpts(PP.getLangOpts()) {
InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
InputFile->getBufferEnd());
@@ -126,9 +129,9 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
/// Lexer constructor - Create a new raw lexer object. This object is only
/// suitable for calls to 'LexRawToken'. This lexer assumes that the text
/// range will outlive it, so it doesn't take ownership of it.
-Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
+Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts,
const char *BufStart, const char *BufPtr, const char *BufEnd)
- : FileLoc(fileloc), Features(features) {
+ : FileLoc(fileloc), LangOpts(langOpts) {
InitLexer(BufStart, BufPtr, BufEnd);
@@ -140,8 +143,8 @@ Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
/// suitable for calls to 'LexRawToken'. This lexer assumes that the text
/// range will outlive it, so it doesn't take ownership of it.
Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile,
- const SourceManager &SM, const LangOptions &features)
- : FileLoc(SM.getLocForStartOfFile(FID)), Features(features) {
+ const SourceManager &SM, const LangOptions &langOpts)
+ : FileLoc(SM.getLocForStartOfFile(FID)), LangOpts(langOpts) {
InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(),
FromFile->getBufferEnd());
@@ -284,7 +287,7 @@ StringRef Lexer::getSpelling(SourceLocation loc,
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
- const LangOptions &Features, bool *Invalid) {
+ const LangOptions &LangOpts, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
// If this token contains nothing interesting, return it directly.
@@ -306,7 +309,7 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
Ptr != End; ) {
unsigned CharSize;
- Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+ Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts));
Ptr += CharSize;
}
assert(Result.size() != unsigned(Tok.getLength()) &&
@@ -326,7 +329,7 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
/// if an internal buffer is returned.
unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
const SourceManager &SourceMgr,
- const LangOptions &Features, bool *Invalid) {
+ const LangOptions &LangOpts, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
const char *TokStart = 0;
@@ -366,7 +369,7 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
Ptr != End; ) {
unsigned CharSize;
- *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+ *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts);
Ptr += CharSize;
}
assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
@@ -487,11 +490,11 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
SourceLocation FileLoc = SM.getSpellingLoc(Loc);
SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts);
std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);
- std::pair<FileID, unsigned> BeginFileLocInfo= SM.getDecomposedLoc(BeginFileLoc);
+ std::pair<FileID, unsigned> BeginFileLocInfo
+ = SM.getDecomposedLoc(BeginFileLoc);
assert(FileLocInfo.first == BeginFileLocInfo.first &&
FileLocInfo.second >= BeginFileLocInfo.second);
- return Loc.getLocWithOffset(SM.getDecomposedLoc(BeginFileLoc).second -
- SM.getDecomposedLoc(FileLoc).second);
+ return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second);
}
namespace {
@@ -505,13 +508,13 @@ namespace {
std::pair<unsigned, bool>
Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
- const LangOptions &Features, unsigned MaxLines) {
+ const LangOptions &LangOpts, unsigned MaxLines) {
// Create a lexer starting at the beginning of the file. Note that we use a
// "fake" file source location at offset 1 so that the lexer will track our
// position within the file.
const unsigned StartOffset = 1;
SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset);
- Lexer TheLexer(StartLoc, Features, Buffer->getBufferStart(),
+ Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(),
Buffer->getBufferStart(), Buffer->getBufferEnd());
bool InPreprocessorDirective = false;
@@ -655,7 +658,7 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
unsigned CharNo,
const SourceManager &SM,
- const LangOptions &Features) {
+ const LangOptions &LangOpts) {
// Figure out how many physical characters away the specified expansion
// character is. This needs to take into consideration newlines and
// trigraphs.
@@ -681,7 +684,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
// lexer to parse it correctly.
for (; CharNo; --CharNo) {
unsigned Size;
- Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
+ Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);
TokPtr += Size;
PhysOffset += Size;
}
@@ -713,19 +716,16 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
/// a source location pointing to the last character in the token, etc.
SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
const SourceManager &SM,
- const LangOptions &Features) {
+ const LangOptions &LangOpts) {
if (Loc.isInvalid())
return SourceLocation();
if (Loc.isMacroID()) {
- if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, Features))
+ if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
return SourceLocation(); // Points inside the macro expansion.
-
- // Continue and find the location just after the macro expansion.
- Loc = SM.getExpansionRange(Loc).second;
}
- unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features);
+ unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
if (Len > Offset)
Len = Len - Offset;
else
@@ -738,7 +738,8 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
/// token of the macro expansion.
bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
const SourceManager &SM,
- const LangOptions &LangOpts) {
+ const LangOptions &LangOpts,
+ SourceLocation *MacroBegin) {
assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc);
@@ -749,17 +750,22 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
SourceLocation expansionLoc =
SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart();
- if (expansionLoc.isFileID())
- return true; // No other macro expansions, this is the first.
+ if (expansionLoc.isFileID()) {
+ // No other macro expansions, this is the first.
+ if (MacroBegin)
+ *MacroBegin = expansionLoc;
+ return true;
+ }
- return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts);
+ return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin);
}
/// \brief Returns true if the given MacroID location points at the last
/// token of the macro expansion.
bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
- const SourceManager &SM,
- const LangOptions &LangOpts) {
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ SourceLocation *MacroEnd) {
assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
SourceLocation spellLoc = SM.getSpellingLoc(loc);
@@ -777,10 +783,192 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
SourceLocation expansionLoc =
SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd();
- if (expansionLoc.isFileID())
- return true; // No other macro expansions.
+ if (expansionLoc.isFileID()) {
+ // No other macro expansions.
+ if (MacroEnd)
+ *MacroEnd = expansionLoc;
+ return true;
+ }
+
+ return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts, MacroEnd);
+}
+
+static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ SourceLocation Begin = Range.getBegin();
+ SourceLocation End = Range.getEnd();
+ assert(Begin.isFileID() && End.isFileID());
+ if (Range.isTokenRange()) {
+ End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts);
+ if (End.isInvalid())
+ return CharSourceRange();
+ }
+
+ // Break down the source locations.
+ FileID FID;
+ unsigned BeginOffs;
+ llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin);
+ if (FID.isInvalid())
+ return CharSourceRange();
+
+ unsigned EndOffs;
+ if (!SM.isInFileID(End, FID, &EndOffs) ||
+ BeginOffs > EndOffs)
+ return CharSourceRange();
+
+ return CharSourceRange::getCharRange(Begin, End);
+}
+
+/// \brief Accepts a range and returns a character range with file locations.
+///
+/// Returns a null range if a part of the range resides inside a macro
+/// expansion or the range does not reside on the same FileID.
+CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ SourceLocation Begin = Range.getBegin();
+ SourceLocation End = Range.getEnd();
+ if (Begin.isInvalid() || End.isInvalid())
+ return CharSourceRange();
+
+ if (Begin.isFileID() && End.isFileID())
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+
+ if (Begin.isMacroID() && End.isFileID()) {
+ if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin))
+ return CharSourceRange();
+ Range.setBegin(Begin);
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+ }
+
+ if (Begin.isFileID() && End.isMacroID()) {
+ if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,
+ &End)) ||
+ (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
+ &End)))
+ return CharSourceRange();
+ Range.setEnd(End);
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+ }
+
+ assert(Begin.isMacroID() && End.isMacroID());
+ SourceLocation MacroBegin, MacroEnd;
+ if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) &&
+ ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts,
+ &MacroEnd)) ||
+ (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts,
+ &MacroEnd)))) {
+ Range.setBegin(MacroBegin);
+ Range.setEnd(MacroEnd);
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+ }
- return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts);
+ FileID FID;
+ unsigned BeginOffs;
+ llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin);
+ if (FID.isInvalid())
+ return CharSourceRange();
+
+ unsigned EndOffs;
+ if (!SM.isInFileID(End, FID, &EndOffs) ||
+ BeginOffs > EndOffs)
+ return CharSourceRange();
+
+ const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
+ const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
+ if (Expansion.isMacroArgExpansion() &&
+ Expansion.getSpellingLoc().isFileID()) {
+ SourceLocation SpellLoc = Expansion.getSpellingLoc();
+ Range.setBegin(SpellLoc.getLocWithOffset(BeginOffs));
+ Range.setEnd(SpellLoc.getLocWithOffset(EndOffs));
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+ }
+
+ return CharSourceRange();
+}
+
+StringRef Lexer::getSourceText(CharSourceRange Range,
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ bool *Invalid) {
+ Range = makeFileCharRange(Range, SM, LangOpts);
+ if (Range.isInvalid()) {
+ if (Invalid) *Invalid = true;
+ return StringRef();
+ }
+
+ // Break down the source location.
+ std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin());
+ if (beginInfo.first.isInvalid()) {
+ if (Invalid) *Invalid = true;
+ return StringRef();
+ }
+
+ unsigned EndOffs;
+ if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
+ beginInfo.second > EndOffs) {
+ if (Invalid) *Invalid = true;
+ return StringRef();
+ }
+
+ // Try to the load the file buffer.
+ bool invalidTemp = false;
+ StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp);
+ if (invalidTemp) {
+ if (Invalid) *Invalid = true;
+ return StringRef();
+ }
+
+ if (Invalid) *Invalid = false;
+ return file.substr(beginInfo.second, EndOffs - beginInfo.second);
+}
+
+StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ assert(Loc.isMacroID() && "Only reasonble to call this on macros");
+
+ // Find the location of the immediate macro expansion.
+ while (1) {
+ FileID FID = SM.getFileID(Loc);
+ const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
+ const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
+ Loc = Expansion.getExpansionLocStart();
+ if (!Expansion.isMacroArgExpansion())
+ break;
+
+ // For macro arguments we need to check that the argument did not come
+ // from an inner macro, e.g: "MAC1( MAC2(foo) )"
+
+ // Loc points to the argument id of the macro definition, move to the
+ // macro expansion.
+ Loc = SM.getImmediateExpansionRange(Loc).first;
+ SourceLocation SpellLoc = Expansion.getSpellingLoc();
+ if (SpellLoc.isFileID())
+ break; // No inner macro.
+
+ // If spelling location resides in the same FileID as macro expansion
+ // location, it means there is no inner macro.
+ FileID MacroFID = SM.getFileID(Loc);
+ if (SM.isInFileID(SpellLoc, MacroFID))
+ break;
+
+ // Argument came from inner macro.
+ Loc = SpellLoc;
+ }
+
+ // Find the spelling location of the start of the non-argument expansion
+ // range. This is where the macro name was spelled in order to begin
+ // expanding this macro.
+ Loc = SM.getSpellingLoc(Loc);
+
+ // Dig out the buffer where the macro name was spelled and the extents of the
+ // name so that we can render it into the expansion note.
+ std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc);
+ unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
+ StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first);
+ return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
}
//===----------------------------------------------------------------------===//
@@ -890,6 +1078,12 @@ static void InitCharacterInfo() {
}
+/// isIdentifierHead - Return true if this is the first character of an
+/// identifier, which is [a-zA-Z_].
+static inline bool isIdentifierHead(unsigned char c) {
+ return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
+}
+
/// isIdentifierBody - Return true if this is the body character of an
/// identifier, which is [a-zA-Z0-9_].
static inline bool isIdentifierBody(unsigned char c) {
@@ -1018,7 +1212,7 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) {
char Res = GetTrigraphCharForLetter(*CP);
if (!Res || !L) return Res;
- if (!L->getFeatures().Trigraphs) {
+ if (!L->getLangOpts().Trigraphs) {
if (!L->isLexingRawMode())
L->Diag(CP-2, diag::trigraph_ignored);
return 0;
@@ -1085,9 +1279,8 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
if (Loc.isMacroID()) {
- if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts))
+ if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
return SourceLocation();
- Loc = SM.getExpansionRange(Loc).second;
}
Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
@@ -1169,6 +1362,13 @@ Slash:
// Found backslash<whitespace><newline>. Parse the char after it.
Size += EscapedNewLineSize;
Ptr += EscapedNewLineSize;
+
+ // If the char that we finally got was a \n, then we must have had
+ // something like \<newline><newline>. We don't want to consume the
+ // second newline.
+ if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0')
+ return ' ';
+
// Use slow version to accumulate a correct size field.
return getCharAndSizeSlow(Ptr, Size, Tok);
}
@@ -1205,7 +1405,7 @@ Slash:
/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
/// be updated to match.
char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
- const LangOptions &Features) {
+ const LangOptions &LangOpts) {
// If we have a slash, look for an escaped newline.
if (Ptr[0] == '\\') {
++Size;
@@ -1220,8 +1420,14 @@ Slash:
Size += EscapedNewLineSize;
Ptr += EscapedNewLineSize;
+ // If the char that we finally got was a \n, then we must have had
+ // something like \<newline><newline>. We don't want to consume the
+ // second newline.
+ if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0')
+ return ' ';
+
// Use slow version to accumulate a correct size field.
- return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
+ return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
}
// Otherwise, this is not an escaped newline, just return the slash.
@@ -1229,7 +1435,7 @@ Slash:
}
// If this is a trigraph, process it.
- if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
+ if (LangOpts.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
// If this is actually a legal trigraph (not something like "??x"), return
// it.
if (char C = GetTrigraphCharForLetter(Ptr[2])) {
@@ -1272,7 +1478,7 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
//
// TODO: Could merge these checks into a CharInfo flag to make the comparison
// cheaper
- if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
+ if (C != '\\' && C != '?' && (C != '$' || !LangOpts.DollarIdents)) {
FinishIdentifier:
const char *IdStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
@@ -1301,7 +1507,7 @@ FinishIdentifier:
while (1) {
if (C == '$') {
// If we hit a $ and they are not supported in identifiers, we are done.
- if (!Features.DollarIdents) goto FinishIdentifier;
+ if (!LangOpts.DollarIdents) goto FinishIdentifier;
// Otherwise, emit a diagnostic and continue.
if (!isLexingRawMode())
@@ -1327,12 +1533,12 @@ FinishIdentifier:
/// isHexaLiteral - Return true if Start points to a hex constant.
/// in microsoft mode (where this is supposed to be several different tokens).
-static bool isHexaLiteral(const char *Start, const LangOptions &Features) {
+static bool isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
unsigned Size;
- char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, Features);
+ char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts);
if (C1 != '0')
return false;
- char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, Features);
+ char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts);
return (C2 == 'x' || C2 == 'X');
}
@@ -1343,7 +1549,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
unsigned Size;
char C = getCharAndSize(CurPtr, Size);
char PrevCh = 0;
- while (isNumberBody(C)) { // FIXME: UCNs?
+ while (isNumberBody(C)) { // FIXME: UCNs.
CurPtr = ConsumeChar(CurPtr, Size, Result);
PrevCh = C;
C = getCharAndSize(CurPtr, Size);
@@ -1353,7 +1559,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) {
// If we are in Microsoft mode, don't continue if the constant is hex.
// For example, MSVC will accept the following as 3 tokens: 0x1234567e+1
- if (!Features.MicrosoftExt || !isHexaLiteral(BufferPtr, Features))
+ if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
}
@@ -1367,6 +1573,46 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
Result.setLiteralData(TokStart);
}
+/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
+/// in C++11, or warn on a ud-suffix in C++98.
+const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) {
+ assert(getLangOpts().CPlusPlus);
+
+ // Maximally munch an identifier. FIXME: UCNs.
+ unsigned Size;
+ char C = getCharAndSize(CurPtr, Size);
+ if (isIdentifierHead(C)) {
+ if (!getLangOpts().CPlusPlus0x) {
+ if (!isLexingRawMode())
+ Diag(CurPtr,
+ C == '_' ? diag::warn_cxx11_compat_user_defined_literal
+ : diag::warn_cxx11_compat_reserved_user_defined_literal)
+ << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+ return CurPtr;
+ }
+
+ // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix
+ // that does not start with an underscore is ill-formed. As a conforming
+ // extension, we treat all such suffixes as if they had whitespace before
+ // them.
+ if (C != '_') {
+ if (!isLexingRawMode())
+ Diag(CurPtr, getLangOpts().MicrosoftMode ?
+ diag::ext_ms_reserved_user_defined_literal :
+ diag::ext_reserved_user_defined_literal)
+ << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+ return CurPtr;
+ }
+
+ Result.setFlag(Token::HasUDSuffix);
+ do {
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ C = getCharAndSize(CurPtr, Size);
+ } while (isIdentifierBody(C));
+ }
+ return CurPtr;
+}
+
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
/// either " or L" or u8" or u" or U".
void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
@@ -1388,7 +1634,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
if (C == '\n' || C == '\r' || // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
- if (!isLexingRawMode() && !Features.AsmPreprocessor)
+ if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
Diag(BufferPtr, diag::warn_unterminated_string);
FormTokenWithChars(Result, CurPtr-1, tok::unknown);
return;
@@ -1406,6 +1652,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
C = getAndAdvanceChar(CurPtr, Result);
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getLangOpts().CPlusPlus)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// If a nul character existed in the string, warn about it.
if (NulCharacter && !isLexingRawMode())
Diag(NulCharacter, diag::null_in_string);
@@ -1485,6 +1735,10 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
}
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getLangOpts().CPlusPlus)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, Kind);
@@ -1538,7 +1792,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
char C = getAndAdvanceChar(CurPtr, Result);
if (C == '\'') {
- if (!isLexingRawMode() && !Features.AsmPreprocessor)
+ if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
Diag(BufferPtr, diag::err_empty_character);
FormTokenWithChars(Result, CurPtr, tok::unknown);
return;
@@ -1552,7 +1806,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
C = getAndAdvanceChar(CurPtr, Result);
} else if (C == '\n' || C == '\r' || // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
- if (!isLexingRawMode() && !Features.AsmPreprocessor)
+ if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
Diag(BufferPtr, diag::warn_unterminated_char);
FormTokenWithChars(Result, CurPtr-1, tok::unknown);
return;
@@ -1568,6 +1822,10 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
C = getAndAdvanceChar(CurPtr, Result);
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getLangOpts().CPlusPlus)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// If a nul character existed in the character, warn about it.
if (NulCharacter && !isLexingRawMode())
Diag(NulCharacter, diag::null_in_char);
@@ -1633,12 +1891,12 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
// If BCPL comments aren't explicitly enabled for this language, emit an
// extension warning.
- if (!Features.BCPLComment && !isLexingRawMode()) {
+ if (!LangOpts.BCPLComment && !isLexingRawMode()) {
Diag(BufferPtr, diag::ext_bcpl_comment);
// Mark them enabled so we only emit one warning for this translation
// unit.
- Features.BCPLComment = true;
+ LangOpts.BCPLComment = true;
}
// Scan over the body of the comment. The common case, when scanning, is that
@@ -1687,14 +1945,6 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
break;
}
- // If the char that we finally got was a \n, then we must have had something
- // like \<newline><newline>. We don't want to have consumed the second
- // newline, we want CurPtr, to end up pointing to it down below.
- if (C == '\n' || C == '\r') {
- --CurPtr;
- C = 'x'; // doesn't matter what this is.
- }
-
// If we read multiple characters, and one of those characters was a \r or
// \n, then we had an escaped newline within the comment. Emit diagnostic
// unless the next line is also a // comment.
@@ -1833,7 +2083,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
// If no trigraphs are enabled, warn that we ignored this trigraph and
// ignore this * character.
- if (!L->getFeatures().Trigraphs) {
+ if (!L->getLangOpts().Trigraphs) {
if (!L->isLexingRawMode())
L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
return false;
@@ -1916,11 +2166,18 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
if (C == '/') goto FoundSlash;
#ifdef __SSE2__
- __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/',
- '/', '/', '/', '/', '/', '/', '/', '/');
- while (CurPtr+16 <= BufferEnd &&
- _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0)
+ __m128i Slashes = _mm_set1_epi8('/');
+ while (CurPtr+16 <= BufferEnd) {
+ int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes));
+ if (cmp != 0) {
+ // Adjust the pointer to point directly after the first slash. It's
+ // not necessary to set C here, it will be overwritten at the end of
+ // the outer loop.
+ CurPtr += llvm::CountTrailingZeros_32(cmp) + 1;
+ goto FoundSlash;
+ }
CurPtr += 16;
+ }
#elif __ALTIVEC__
__vector unsigned char Slashes = {
'/', '/', '/', '/', '/', '/', '/', '/',
@@ -1948,8 +2205,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
while (C != '/' && C != '\0')
C = *CurPtr++;
- FoundSlash:
if (C == '/') {
+ FoundSlash:
if (CurPtr[-2] == '*') // We found the final */. We're done!
break;
@@ -2119,8 +2376,9 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
// C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
// a pedwarn.
if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r'))
- Diag(BufferEnd, diag::ext_no_newline_eof)
- << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n");
+ Diag(BufferEnd, LangOpts.CPlusPlus0x ? // C++11 [lex.phases] 2.2 p2
+ diag::warn_cxx98_compat_no_newline_eof : diag::ext_no_newline_eof)
+ << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n");
BufferPtr = CurPtr;
@@ -2345,7 +2603,7 @@ LexNextToken:
case 26: // DOS & CP/M EOF: "^Z".
// If we're in Microsoft extensions mode, treat this as end of file.
- if (Features.MicrosoftExt) {
+ if (LangOpts.MicrosoftExt) {
// Read the PP instance variable into an automatic variable, because
// LexEndOfFile will often delete 'this'.
Preprocessor *PPCache = PP;
@@ -2398,7 +2656,7 @@ LexNextToken:
// If the next token is obviously a // or /* */ comment, skip it efficiently
// too (without going through the big switch stmt).
if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
- Features.BCPLComment && !Features.TraditionalCPP) {
+ LangOpts.BCPLComment && !LangOpts.TraditionalCPP) {
if (SkipBCPLComment(Result, CurPtr+2))
return; // There is a token to return.
goto SkipIgnoredUnits;
@@ -2423,7 +2681,7 @@ LexNextToken:
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- if (Features.CPlusPlus0x) {
+ if (LangOpts.CPlusPlus0x) {
Char = getCharAndSize(CurPtr, SizeTmp);
// UTF-16 string literal
@@ -2475,7 +2733,7 @@ LexNextToken:
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- if (Features.CPlusPlus0x) {
+ if (LangOpts.CPlusPlus0x) {
Char = getCharAndSize(CurPtr, SizeTmp);
// UTF-32 string literal
@@ -2503,7 +2761,7 @@ LexNextToken:
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- if (Features.CPlusPlus0x) {
+ if (LangOpts.CPlusPlus0x) {
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '"')
@@ -2526,7 +2784,7 @@ LexNextToken:
tok::wide_string_literal);
// Wide raw string literal.
- if (Features.CPlusPlus0x && Char == 'R' &&
+ if (LangOpts.CPlusPlus0x && Char == 'R' &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -2554,7 +2812,7 @@ LexNextToken:
return LexIdentifier(Result, CurPtr);
case '$': // $ in identifiers.
- if (Features.DollarIdents) {
+ if (LangOpts.DollarIdents) {
if (!isLexingRawMode())
Diag(CurPtr-1, diag::ext_dollar_in_identifier);
// Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -2606,7 +2864,7 @@ LexNextToken:
MIOpt.ReadToken();
return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
- } else if (Features.CPlusPlus && Char == '*') {
+ } else if (LangOpts.CPlusPlus && Char == '*') {
Kind = tok::periodstar;
CurPtr += SizeTmp;
} else if (Char == '.' &&
@@ -2655,7 +2913,7 @@ LexNextToken:
if (Char == '-') { // --
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::minusminus;
- } else if (Char == '>' && Features.CPlusPlus &&
+ } else if (Char == '>' && LangOpts.CPlusPlus &&
getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->*
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
@@ -2693,9 +2951,9 @@ LexNextToken:
// "foo". Check to see if the character after the second slash is a '*'.
// If so, we will lex that as a "/" instead of the start of a comment.
// However, we never do this in -traditional-cpp mode.
- if ((Features.BCPLComment ||
+ if ((LangOpts.BCPLComment ||
getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') &&
- !Features.TraditionalCPP) {
+ !LangOpts.TraditionalCPP) {
if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
return; // There is a token to return.
@@ -2724,20 +2982,20 @@ LexNextToken:
if (Char == '=') {
Kind = tok::percentequal;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- } else if (Features.Digraphs && Char == '>') {
+ } else if (LangOpts.Digraphs && Char == '>') {
Kind = tok::r_brace; // '%>' -> '}'
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- } else if (Features.Digraphs && Char == ':') {
+ } else if (LangOpts.Digraphs && Char == ':') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
Kind = tok::hashhash; // '%:%:' -> '##'
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
- } else if (Char == '@' && Features.MicrosoftExt) {// %:@ -> #@ -> Charize
+ } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
if (!isLexingRawMode())
- Diag(BufferPtr, diag::charize_microsoft_ext);
+ Diag(BufferPtr, diag::ext_charize_microsoft);
Kind = tok::hashat;
} else { // '%:' -> '#'
// We parsed a # character. If this occurs at the start of the line,
@@ -2789,7 +3047,7 @@ LexNextToken:
// If this is '<<<<' and we're in a Perforce-style conflict marker,
// ignore it.
goto LexNextToken;
- } else if (Features.CUDA && After == '<') {
+ } else if (LangOpts.CUDA && After == '<') {
Kind = tok::lesslessless;
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
@@ -2800,8 +3058,8 @@ LexNextToken:
} else if (Char == '=') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::lessequal;
- } else if (Features.Digraphs && Char == ':') { // '<:' -> '['
- if (Features.CPlusPlus0x &&
+ } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '['
+ if (LangOpts.CPlusPlus0x &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
// C++0x [lex.pptoken]p3:
// Otherwise, if the next three characters are <:: and the subsequent
@@ -2820,7 +3078,7 @@ LexNextToken:
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::l_square;
- } else if (Features.Digraphs && Char == '%') { // '<%' -> '{'
+ } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{'
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::l_brace;
} else {
@@ -2845,7 +3103,7 @@ LexNextToken:
} else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
// If this is '>>>>>>>' and we're in a conflict marker, ignore it.
goto LexNextToken;
- } else if (Features.CUDA && After == '>') {
+ } else if (LangOpts.CUDA && After == '>') {
Kind = tok::greatergreatergreater;
CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result);
@@ -2884,10 +3142,10 @@ LexNextToken:
break;
case ':':
Char = getCharAndSize(CurPtr, SizeTmp);
- if (Features.Digraphs && Char == '>') {
+ if (LangOpts.Digraphs && Char == '>') {
Kind = tok::r_square; // ':>' -> ']'
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- } else if (Features.CPlusPlus && Char == ':') {
+ } else if (LangOpts.CPlusPlus && Char == ':') {
Kind = tok::coloncolon;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else {
@@ -2918,10 +3176,10 @@ LexNextToken:
if (Char == '#') {
Kind = tok::hashhash;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- } else if (Char == '@' && Features.MicrosoftExt) { // #@ -> Charize
+ } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize
Kind = tok::hashat;
if (!isLexingRawMode())
- Diag(BufferPtr, diag::charize_microsoft_ext);
+ Diag(BufferPtr, diag::ext_charize_microsoft);
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else {
// We parsed a # character. If this occurs at the start of the line,
@@ -2954,7 +3212,7 @@ LexNextToken:
case '@':
// Objective C support.
- if (CurPtr[-1] == '@' && Features.ObjC1)
+ if (CurPtr[-1] == '@' && LangOpts.ObjC1)
Kind = tok::at;
else
Kind = tok::unknown;