diff options
Diffstat (limited to 'lib/Lex/Lexer.cpp')
| -rw-r--r-- | lib/Lex/Lexer.cpp | 82 | 
1 files changed, 46 insertions, 36 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 9c2a0163acead..6025a66751254 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -14,18 +14,27 @@  #include "clang/Lex/Lexer.h"  #include "UnicodeCharSets.h"  #include "clang/Basic/CharInfo.h" +#include "clang/Basic/IdentifierTable.h"  #include "clang/Basic/SourceManager.h" -#include "clang/Lex/CodeCompletionHandler.h"  #include "clang/Lex/LexDiagnostic.h"  #include "clang/Lex/LiteralSupport.h"  #include "clang/Lex/Preprocessor.h" -#include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/StringSwitch.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/MathExtras.h"  #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/UnicodeCharRanges.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint>  #include <cstring> +#include <string> +#include <tuple> +#include <utility> +  using namespace clang;  //===----------------------------------------------------------------------===// @@ -45,7 +54,6 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {    return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;  } -  //===----------------------------------------------------------------------===//  // Lexer Class Implementation  //===----------------------------------------------------------------------===// @@ -196,7 +204,6 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,    return L;  } -  /// Stringify - Convert the specified string into a C string, with surrounding  /// ""'s, and with escaped \ and " characters.  std::string Lexer::Stringify(StringRef Str, bool Charify) { @@ -398,7 +405,6 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,    return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));  } -  /// MeasureTokenLength - Relex the token at the specified location and return  /// its length in bytes in the input file.  If the token needs cleaning (e.g.  /// includes a trigraph or an escaped newline) then this count includes bytes @@ -526,13 +532,15 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,  }  namespace { +    enum PreambleDirectiveKind {      PDK_Skipped,      PDK_StartIf,      PDK_EndIf,      PDK_Unknown    }; -} + +} // end anonymous namespace  std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,                                                   const LangOptions &LangOpts, @@ -694,7 +702,6 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,                                 : TheTok.isAtStartOfLine());  } -  /// AdvanceToTokenCharacter - Given a location that specifies the start of a  /// token, return a new location that specifies a character within the token.  SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, @@ -961,7 +968,7 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc,    assert(Loc.isMacroID() && "Only reasonble to call this on macros");    // Find the location of the immediate macro expansion. -  while (1) { +  while (true) {      FileID FID = SM.getFileID(Loc);      const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);      const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); @@ -1031,7 +1038,6 @@ bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) {    return isIdentifierBody(c, LangOpts.DollarIdents);  } -  //===----------------------------------------------------------------------===//  // Diagnostics forwarding code.  //===----------------------------------------------------------------------===// @@ -1157,7 +1163,7 @@ unsigned Lexer::getEscapedNewLineSize(const char *Ptr) {  /// them), skip over them and return the first non-escaped-newline found,  /// otherwise return P.  const char *Lexer::SkipEscapedNewLines(const char *P) { -  while (1) { +  while (true) {      const char *AfterEscape;      if (*P == '\\') {        AfterEscape = P+1; @@ -1310,7 +1316,6 @@ Slash:    return *Ptr;  } -  /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the  /// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size,  /// and that we have already incremented Ptr by Size bytes. @@ -1480,13 +1485,13 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,  bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {    const char *UnicodePtr = CurPtr; -  UTF32 CodePoint; -  ConversionResult Result = -      llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, -                                (const UTF8 *)BufferEnd, +  llvm::UTF32 CodePoint; +  llvm::ConversionResult Result = +      llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr, +                                (const llvm::UTF8 *)BufferEnd,                                  &CodePoint, -                                strictConversion); -  if (Result != conversionOK || +                                llvm::strictConversion); +  if (Result != llvm::conversionOK ||        !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))      return false; @@ -1533,14 +1538,22 @@ FinishIdentifier:      // preprocessor, which may macro expand it or something.      if (II->isHandleIdentifierCase())        return PP->HandleIdentifier(Result); -     + +    if (II->getTokenID() == tok::identifier && isCodeCompletionPoint(CurPtr) +        && II->getPPKeywordID() == tok::pp_not_keyword +        && II->getObjCKeywordID() == tok::objc_not_keyword) { +      // Return the code-completion token. +      Result.setKind(tok::code_completion); +      cutOffLexing(); +      return true; +    }      return true;    }    // Otherwise, $,\,? in identifier found.  Enter slower path.    C = getCharAndSize(CurPtr, Size); -  while (1) { +  while (true) {      if (C == '$') {        // If we hit a $ and they are not supported in identifiers, we are done.        if (!LangOpts.DollarIdents) goto FinishIdentifier; @@ -1700,9 +1713,9 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,                                           getLangOpts());          if (!isIdentifierBody(Next)) {            // End of suffix. Check whether this is on the whitelist. -          IsUDSuffix = (Chars == 1 && Buffer[0] == 's') || -                       NumericLiteralParser::isValidUDSuffix( -                           getLangOpts(), StringRef(Buffer, Chars)); +          const StringRef CompleteSuffix(Buffer, Chars); +          IsUDSuffix = StringLiteralParser::isValidUDSuffix(getLangOpts(), +                                                            CompleteSuffix);            break;          } @@ -1829,7 +1842,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,      // Search for the next '"' in hopes of salvaging the lexer. Unfortunately,      // it's possible the '"' was intended to be part of the raw string, but      // there's not much we can do about that. -    while (1) { +    while (true) {        char C = *CurPtr++;        if (C == '"') @@ -1848,7 +1861,7 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,    const char *Prefix = CurPtr;    CurPtr += PrefixLen + 1; // skip over prefix and '(' -  while (1) { +  while (true) {      char C = *CurPtr++;      if (C == ')') { @@ -1913,7 +1926,6 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {    return true;  } -  /// LexCharConstant - Lex the remainder of a character constant, after having  /// lexed either ' or L' or u8' or u' or U'.  bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, @@ -1992,7 +2004,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,    unsigned char Char = *CurPtr;    // Skip consecutive spaces efficiently. -  while (1) { +  while (true) {      // Skip horizontal whitespace very aggressively.      while (isHorizontalWhitespace(Char))        Char = *++CurPtr; @@ -2315,7 +2327,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,    if (C == '/')      C = *CurPtr++; -  while (1) { +  while (true) {      // Skip over all non-interesting characters until we find end of buffer or a      // (probably ending) '/' character.      if (CurPtr + 24 < BufferEnd && @@ -2456,7 +2468,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) {    // CurPtr - Cache BufferPtr in an automatic variable.    const char *CurPtr = BufferPtr; -  while (1) { +  while (true) {      char Char = getAndAdvanceChar(CurPtr, Tmp);      switch (Char) {      default: @@ -2669,7 +2681,6 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {    return false;  } -  /// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if  /// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it  /// is the end of a conflict marker.  Handle it by ignoring up until the end of @@ -3498,7 +3509,6 @@ LexNextToken:          CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);          Kind = tok::greatergreater;        } -            } else {        Kind = tok::greater;      } @@ -3615,17 +3625,17 @@ LexNextToken:        break;      } -    UTF32 CodePoint; +    llvm::UTF32 CodePoint;      // We can't just reset CurPtr to BufferPtr because BufferPtr may point to      // an escaped newline.      --CurPtr; -    ConversionResult Status = -        llvm::convertUTF8Sequence((const UTF8 **)&CurPtr, -                                  (const UTF8 *)BufferEnd, +    llvm::ConversionResult Status = +        llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, +                                  (const llvm::UTF8 *)BufferEnd,                                    &CodePoint, -                                  strictConversion); -    if (Status == conversionOK) { +                                  llvm::strictConversion); +    if (Status == llvm::conversionOK) {        if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {          if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))            return true; // KeepWhitespaceMode  | 
