diff options
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r-- | lib/Lex/Lexer.cpp | 103 |
1 files changed, 60 insertions, 43 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 830354ab23f0d..e8588a771a431 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -257,7 +257,7 @@ void Lexer::Stringify(SmallVectorImpl<char> &Str) { StringifyImpl(Str, '"'); } // Token Spelling //===----------------------------------------------------------------------===// -/// \brief Slow case of getSpelling. Extract the characters comprising the +/// Slow case of getSpelling. Extract the characters comprising the /// spelling of this token from the provided input buffer. static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling) { @@ -442,7 +442,7 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc, return TheTok.getLength(); } -/// \brief Relex the token at the specified location. +/// Relex the token at the specified location. /// \returns true if there was a failure, false on success. bool Lexer::getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, @@ -708,12 +708,9 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer, TheTok.isAtStartOfLine()); } -/// AdvanceToTokenCharacter - Given a location that specifies the start of a -/// token, return a new location that specifies a character within the token. -SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, - unsigned CharNo, - const SourceManager &SM, - const LangOptions &LangOpts) { +unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, + const SourceManager &SM, + const LangOptions &LangOpts) { // Figure out how many physical characters away the specified expansion // character is. This needs to take into consideration newlines and // trigraphs. @@ -722,7 +719,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, // If they request the first char of the token, we're trivially done. if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))) - return TokStart; + return 0; unsigned PhysOffset = 0; @@ -731,7 +728,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, // chars, this method is extremely fast. while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { if (CharNo == 0) - return TokStart.getLocWithOffset(PhysOffset); + return PhysOffset; ++TokPtr; --CharNo; ++PhysOffset; @@ -753,10 +750,10 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; - return TokStart.getLocWithOffset(PhysOffset); + return PhysOffset; } -/// \brief Computes the source location just past the end of the +/// Computes the source location just past the end of the /// token at this source location. /// /// This routine can be used to produce a source location that @@ -791,7 +788,7 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, return Loc.getLocWithOffset(Len); } -/// \brief Returns true if the given MacroID location points at the first +/// Returns true if the given MacroID location points at the first /// token of the macro expansion. bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, @@ -813,7 +810,7 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin); } -/// \brief Returns true if the given MacroID location points at the last +/// Returns true if the given MacroID location points at the last /// token of the macro expansion. bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, @@ -971,7 +968,7 @@ StringRef Lexer::getSourceText(CharSourceRange Range, StringRef Lexer::getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { - assert(Loc.isMacroID() && "Only reasonble to call this on macros"); + assert(Loc.isMacroID() && "Only reasonable to call this on macros"); // Find the location of the immediate macro expansion. while (true) { @@ -987,7 +984,7 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc, // Loc points to the argument id of the macro definition, move to the // macro expansion. - Loc = SM.getImmediateExpansionRange(Loc).first; + Loc = SM.getImmediateExpansionRange(Loc).getBegin(); SourceLocation SpellLoc = Expansion.getSpellingLoc(); if (SpellLoc.isFileID()) break; // No inner macro. @@ -1017,10 +1014,10 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc, StringRef Lexer::getImmediateMacroNameForDiagnostics( SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { - assert(Loc.isMacroID() && "Only reasonble to call this on macros"); + assert(Loc.isMacroID() && "Only reasonable to call this on macros"); // Walk past macro argument expanions. while (SM.isMacroArgExpansion(Loc)) - Loc = SM.getImmediateExpansionRange(Loc).first; + Loc = SM.getImmediateExpansionRange(Loc).getBegin(); // If the macro's spelling has no FileID, then it's actually a token paste // or stringization (or similar) and not a macro at all. @@ -1030,7 +1027,7 @@ StringRef Lexer::getImmediateMacroNameForDiagnostics( // Find the spelling location of the start of the non-argument expansion // range. This is where the macro name was spelled in order to begin // expanding this macro. - Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).first); + Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).getBegin()); // Dig out the buffer where the macro name was spelled and the extents of the // name so that we can render it into the expansion note. @@ -1112,10 +1109,9 @@ static SourceLocation GetMappedTokenLoc(Preprocessor &PP, // Figure out the expansion loc range, which is the range covered by the // original _Pragma(...) sequence. - std::pair<SourceLocation,SourceLocation> II = - SM.getImmediateExpansionRange(FileLoc); + CharSourceRange II = SM.getImmediateExpansionRange(FileLoc); - return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen); + return SM.createExpansionLoc(SpellingLoc, II.getBegin(), II.getEnd(), TokLen); } /// getSourceLocation - Return a source location identifier for the specified @@ -1260,7 +1256,7 @@ Optional<Token> Lexer::findNextToken(SourceLocation Loc, return Tok; } -/// \brief Checks that the given token is the first token that occurs after the +/// Checks that the given token is the first token that occurs after the /// given location (this excludes comments and whitespace). Returns the location /// immediately after the specified token. If the token is not found or the /// location is inside a macro, the returned source location will be invalid. @@ -1413,7 +1409,7 @@ Slash: // Helper methods for lexing. //===----------------------------------------------------------------------===// -/// \brief Routine that indiscriminately sets the offset into the source file. +/// Routine that indiscriminately sets the offset into the source file. void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) { BufferPtr = BufferStart + Offset; if (BufferPtr > BufferEnd) @@ -1645,20 +1641,38 @@ FinishIdentifier: // Fill in Result.IdentifierInfo and update the token kind, // looking up the identifier in the identifier table. IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + // Note that we have to call PP->LookUpIdentifierInfo() even for code + // completion, it writes IdentifierInfo into Result, and callers rely on it. + + // If the completion point is at the end of an identifier, we want to treat + // the identifier as incomplete even if it resolves to a macro or a keyword. + // This allows e.g. 'class^' to complete to 'classifier'. + if (isCodeCompletionPoint(CurPtr)) { + // Return the code-completion token. + Result.setKind(tok::code_completion); + // Skip the code-completion char and all immediate identifier characters. + // This ensures we get consistent behavior when completing at any point in + // an identifier (i.e. at the start, in the middle, at the end). Note that + // only simple cases (i.e. [a-zA-Z0-9_]) are supported to keep the code + // simpler. + assert(*CurPtr == 0 && "Completion character must be 0"); + ++CurPtr; + // Note that code completion token is not added as a separate character + // when the completion point is at the end of the buffer. Therefore, we need + // to check if the buffer has ended. + if (CurPtr < BufferEnd) { + while (isIdentifierBody(*CurPtr)) + ++CurPtr; + } + BufferPtr = CurPtr; + return true; + } // Finally, now that we know we have an identifier, pass this off to the // preprocessor, which may macro expand it or something. if (II->isHandleIdentifierCase()) return PP->HandleIdentifier(Result); - if (II->getTokenID() == tok::identifier && isCodeCompletionPoint(CurPtr) - && II->getPPKeywordID() == tok::pp_not_keyword - && II->getObjCKeywordID() == tok::objc_not_keyword) { - // Return the code-completion token. - Result.setKind(tok::code_completion); - cutOffLexing(); - return true; - } return true; } @@ -2009,18 +2023,21 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { const char *AfterLessPos = CurPtr; char C = getAndAdvanceChar(CurPtr, Result); while (C != '>') { - // Skip escaped characters. - if (C == '\\' && CurPtr < BufferEnd) { - // Skip the escaped character. - getAndAdvanceChar(CurPtr, Result); - } else if (C == '\n' || C == '\r' || // Newline. - (C == 0 && (CurPtr-1 == BufferEnd || // End of file. - isCodeCompletionPoint(CurPtr-1)))) { + // Skip escaped characters. Escaped newlines will already be processed by + // getAndAdvanceChar. + if (C == '\\') + C = getAndAdvanceChar(CurPtr, Result); + + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && (CurPtr-1 == BufferEnd || // End of file. + isCodeCompletionPoint(CurPtr-1)))) { // If the filename is unterminated, then it must just be a lone < // character. Return this as such. FormTokenWithChars(Result, AfterLessPos, tok::less); return true; - } else if (C == 0) { + } + + if (C == 0) { NulCharacter = CurPtr-1; } C = getAndAdvanceChar(CurPtr, Result); @@ -2160,7 +2177,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, } /// We have just read the // characters from input. Skip until we find the -/// newline character thats terminate the comment. Then update BufferPtr and +/// newline character that terminates the comment. Then update BufferPtr and /// return. /// /// If we're in KeepCommentMode or any CommentHandler has inserted @@ -2738,7 +2755,7 @@ unsigned Lexer::isNextPPTokenLParen() { return Tok.is(tok::l_paren); } -/// \brief Find the end of a version control conflict marker. +/// Find the end of a version control conflict marker. static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK) { const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>"; @@ -3509,7 +3526,7 @@ LexNextToken: // want to lex this as a comment. There is one problem with this though, // that in one particular corner case, this can change the behavior of the // resultant program. For example, In "foo //**/ bar", C89 would lex - // this as "foo / bar" and langauges with Line comments would lex it as + // this as "foo / bar" and languages with Line comments would lex it as // "foo". Check to see if the character after the second slash is a '*'. // If so, we will lex that as a "/" instead of the start of a comment. // However, we never do this if we are just preprocessing. |