summaryrefslogtreecommitdiff
path: root/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r--lib/Lex/Lexer.cpp103
1 files changed, 60 insertions, 43 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 830354ab23f0d..e8588a771a431 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -257,7 +257,7 @@ void Lexer::Stringify(SmallVectorImpl<char> &Str) { StringifyImpl(Str, '"'); }
// Token Spelling
//===----------------------------------------------------------------------===//
-/// \brief Slow case of getSpelling. Extract the characters comprising the
+/// Slow case of getSpelling. Extract the characters comprising the
/// spelling of this token from the provided input buffer.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
const LangOptions &LangOpts, char *Spelling) {
@@ -442,7 +442,7 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
return TheTok.getLength();
}
-/// \brief Relex the token at the specified location.
+/// Relex the token at the specified location.
/// \returns true if there was a failure, false on success.
bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
const SourceManager &SM,
@@ -708,12 +708,9 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
TheTok.isAtStartOfLine());
}
-/// AdvanceToTokenCharacter - Given a location that specifies the start of a
-/// token, return a new location that specifies a character within the token.
-SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
- unsigned CharNo,
- const SourceManager &SM,
- const LangOptions &LangOpts) {
+unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
// Figure out how many physical characters away the specified expansion
// character is. This needs to take into consideration newlines and
// trigraphs.
@@ -722,7 +719,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
// If they request the first char of the token, we're trivially done.
if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
- return TokStart;
+ return 0;
unsigned PhysOffset = 0;
@@ -731,7 +728,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
// chars, this method is extremely fast.
while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
if (CharNo == 0)
- return TokStart.getLocWithOffset(PhysOffset);
+ return PhysOffset;
++TokPtr;
--CharNo;
++PhysOffset;
@@ -753,10 +750,10 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
- return TokStart.getLocWithOffset(PhysOffset);
+ return PhysOffset;
}
-/// \brief Computes the source location just past the end of the
+/// Computes the source location just past the end of the
/// token at this source location.
///
/// This routine can be used to produce a source location that
@@ -791,7 +788,7 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
return Loc.getLocWithOffset(Len);
}
-/// \brief Returns true if the given MacroID location points at the first
+/// Returns true if the given MacroID location points at the first
/// token of the macro expansion.
bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
const SourceManager &SM,
@@ -813,7 +810,7 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin);
}
-/// \brief Returns true if the given MacroID location points at the last
+/// Returns true if the given MacroID location points at the last
/// token of the macro expansion.
bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
const SourceManager &SM,
@@ -971,7 +968,7 @@ StringRef Lexer::getSourceText(CharSourceRange Range,
StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts) {
- assert(Loc.isMacroID() && "Only reasonble to call this on macros");
+ assert(Loc.isMacroID() && "Only reasonable to call this on macros");
// Find the location of the immediate macro expansion.
while (true) {
@@ -987,7 +984,7 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
// Loc points to the argument id of the macro definition, move to the
// macro expansion.
- Loc = SM.getImmediateExpansionRange(Loc).first;
+ Loc = SM.getImmediateExpansionRange(Loc).getBegin();
SourceLocation SpellLoc = Expansion.getSpellingLoc();
if (SpellLoc.isFileID())
break; // No inner macro.
@@ -1017,10 +1014,10 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
StringRef Lexer::getImmediateMacroNameForDiagnostics(
SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) {
- assert(Loc.isMacroID() && "Only reasonble to call this on macros");
+ assert(Loc.isMacroID() && "Only reasonable to call this on macros");
// Walk past macro argument expanions.
while (SM.isMacroArgExpansion(Loc))
- Loc = SM.getImmediateExpansionRange(Loc).first;
+ Loc = SM.getImmediateExpansionRange(Loc).getBegin();
// If the macro's spelling has no FileID, then it's actually a token paste
// or stringization (or similar) and not a macro at all.
@@ -1030,7 +1027,7 @@ StringRef Lexer::getImmediateMacroNameForDiagnostics(
// Find the spelling location of the start of the non-argument expansion
// range. This is where the macro name was spelled in order to begin
// expanding this macro.
- Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).first);
+ Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).getBegin());
// Dig out the buffer where the macro name was spelled and the extents of the
// name so that we can render it into the expansion note.
@@ -1112,10 +1109,9 @@ static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
// Figure out the expansion loc range, which is the range covered by the
// original _Pragma(...) sequence.
- std::pair<SourceLocation,SourceLocation> II =
- SM.getImmediateExpansionRange(FileLoc);
+ CharSourceRange II = SM.getImmediateExpansionRange(FileLoc);
- return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen);
+ return SM.createExpansionLoc(SpellingLoc, II.getBegin(), II.getEnd(), TokLen);
}
/// getSourceLocation - Return a source location identifier for the specified
@@ -1260,7 +1256,7 @@ Optional<Token> Lexer::findNextToken(SourceLocation Loc,
return Tok;
}
-/// \brief Checks that the given token is the first token that occurs after the
+/// Checks that the given token is the first token that occurs after the
/// given location (this excludes comments and whitespace). Returns the location
/// immediately after the specified token. If the token is not found or the
/// location is inside a macro, the returned source location will be invalid.
@@ -1413,7 +1409,7 @@ Slash:
// Helper methods for lexing.
//===----------------------------------------------------------------------===//
-/// \brief Routine that indiscriminately sets the offset into the source file.
+/// Routine that indiscriminately sets the offset into the source file.
void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) {
BufferPtr = BufferStart + Offset;
if (BufferPtr > BufferEnd)
@@ -1645,20 +1641,38 @@ FinishIdentifier:
// Fill in Result.IdentifierInfo and update the token kind,
// looking up the identifier in the identifier table.
IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+ // Note that we have to call PP->LookUpIdentifierInfo() even for code
+ // completion, it writes IdentifierInfo into Result, and callers rely on it.
+
+ // If the completion point is at the end of an identifier, we want to treat
+ // the identifier as incomplete even if it resolves to a macro or a keyword.
+ // This allows e.g. 'class^' to complete to 'classifier'.
+ if (isCodeCompletionPoint(CurPtr)) {
+ // Return the code-completion token.
+ Result.setKind(tok::code_completion);
+ // Skip the code-completion char and all immediate identifier characters.
+ // This ensures we get consistent behavior when completing at any point in
+ // an identifier (i.e. at the start, in the middle, at the end). Note that
+ // only simple cases (i.e. [a-zA-Z0-9_]) are supported to keep the code
+ // simpler.
+ assert(*CurPtr == 0 && "Completion character must be 0");
+ ++CurPtr;
+ // Note that code completion token is not added as a separate character
+ // when the completion point is at the end of the buffer. Therefore, we need
+ // to check if the buffer has ended.
+ if (CurPtr < BufferEnd) {
+ while (isIdentifierBody(*CurPtr))
+ ++CurPtr;
+ }
+ BufferPtr = CurPtr;
+ return true;
+ }
// Finally, now that we know we have an identifier, pass this off to the
// preprocessor, which may macro expand it or something.
if (II->isHandleIdentifierCase())
return PP->HandleIdentifier(Result);
- if (II->getTokenID() == tok::identifier && isCodeCompletionPoint(CurPtr)
- && II->getPPKeywordID() == tok::pp_not_keyword
- && II->getObjCKeywordID() == tok::objc_not_keyword) {
- // Return the code-completion token.
- Result.setKind(tok::code_completion);
- cutOffLexing();
- return true;
- }
return true;
}
@@ -2009,18 +2023,21 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
const char *AfterLessPos = CurPtr;
char C = getAndAdvanceChar(CurPtr, Result);
while (C != '>') {
- // Skip escaped characters.
- if (C == '\\' && CurPtr < BufferEnd) {
- // Skip the escaped character.
- getAndAdvanceChar(CurPtr, Result);
- } else if (C == '\n' || C == '\r' || // Newline.
- (C == 0 && (CurPtr-1 == BufferEnd || // End of file.
- isCodeCompletionPoint(CurPtr-1)))) {
+ // Skip escaped characters. Escaped newlines will already be processed by
+ // getAndAdvanceChar.
+ if (C == '\\')
+ C = getAndAdvanceChar(CurPtr, Result);
+
+ if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && (CurPtr-1 == BufferEnd || // End of file.
+ isCodeCompletionPoint(CurPtr-1)))) {
// If the filename is unterminated, then it must just be a lone <
// character. Return this as such.
FormTokenWithChars(Result, AfterLessPos, tok::less);
return true;
- } else if (C == 0) {
+ }
+
+ if (C == 0) {
NulCharacter = CurPtr-1;
}
C = getAndAdvanceChar(CurPtr, Result);
@@ -2160,7 +2177,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
}
/// We have just read the // characters from input. Skip until we find the
-/// newline character thats terminate the comment. Then update BufferPtr and
+/// newline character that terminates the comment. Then update BufferPtr and
/// return.
///
/// If we're in KeepCommentMode or any CommentHandler has inserted
@@ -2738,7 +2755,7 @@ unsigned Lexer::isNextPPTokenLParen() {
return Tok.is(tok::l_paren);
}
-/// \brief Find the end of a version control conflict marker.
+/// Find the end of a version control conflict marker.
static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,
ConflictMarkerKind CMK) {
const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>";
@@ -3509,7 +3526,7 @@ LexNextToken:
// want to lex this as a comment. There is one problem with this though,
// that in one particular corner case, this can change the behavior of the
// resultant program. For example, In "foo //**/ bar", C89 would lex
- // this as "foo / bar" and langauges with Line comments would lex it as
+ // this as "foo / bar" and languages with Line comments would lex it as
// "foo". Check to see if the character after the second slash is a '*'.
// If so, we will lex that as a "/" instead of the start of a comment.
// However, we never do this if we are just preprocessing.