diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Lex/Lexer.cpp')
| -rw-r--r-- | contrib/llvm/tools/clang/lib/Lex/Lexer.cpp | 75 | 
1 files changed, 40 insertions, 35 deletions
diff --git a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp index 5212dd86e7a1..a5ba7dbe0a93 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp @@ -513,10 +513,13 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,    // "fake" file source location at offset 1 so that the lexer will track our    // position within the file.    const unsigned StartOffset = 1; -  SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset); -  Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(), +  SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset); +  Lexer TheLexer(FileLoc, LangOpts, Buffer->getBufferStart(),                   Buffer->getBufferStart(), Buffer->getBufferEnd()); -   + +  // StartLoc will differ from FileLoc if there is a BOM that was skipped. +  SourceLocation StartLoc = TheLexer.getSourceLocation(); +    bool InPreprocessorDirective = false;    Token TheTok;    Token IfStartTok; @@ -1534,7 +1537,7 @@ FinishIdentifier:  /// isHexaLiteral - Return true if Start points to a hex constant.  /// in microsoft mode (where this is supposed to be several different tokens). -static bool isHexaLiteral(const char *Start, const LangOptions &LangOpts) { +bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) {    unsigned Size;    char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts);    if (C1 != '0') @@ -1813,17 +1816,18 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,    while (C != '\'') {      // Skip escaped characters. -    if (C == '\\') { -      // Skip the escaped character. -      // FIXME: UCN's -      getAndAdvanceChar(CurPtr, Result); -    } else if (C == '\n' || C == '\r' ||             // Newline. -               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file. +    if (C == '\\') +      C = getAndAdvanceChar(CurPtr, Result); + +    if (C == '\n' || C == '\r' ||             // Newline. +        (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.        if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)          Diag(BufferPtr, diag::ext_unterminated_char);        FormTokenWithChars(Result, CurPtr-1, tok::unknown);        return; -    } else if (C == 0) { +    } + +    if (C == 0) {        if (isCodeCompletionPoint(CurPtr-1)) {          PP->CodeCompleteNaturalLanguage();          FormTokenWithChars(Result, CurPtr-1, tok::unknown); @@ -1895,21 +1899,21 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {    return false;  } -// SkipBCPLComment - We have just read the // characters from input.  Skip until -// we find the newline character thats terminate the comment.  Then update -/// BufferPtr and return. +/// We have just read the // characters from input.  Skip until we find the +/// newline character thats terminate the comment.  Then update BufferPtr and +/// return.  ///  /// If we're in KeepCommentMode or any CommentHandler has inserted  /// some tokens, this will store the first token and return true. -bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { -  // If BCPL comments aren't explicitly enabled for this language, emit an +bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) { +  // If Line comments aren't explicitly enabled for this language, emit an    // extension warning. -  if (!LangOpts.BCPLComment && !isLexingRawMode()) { -    Diag(BufferPtr, diag::ext_bcpl_comment); +  if (!LangOpts.LineComment && !isLexingRawMode()) { +    Diag(BufferPtr, diag::ext_line_comment);      // Mark them enabled so we only emit one warning for this translation      // unit. -    LangOpts.BCPLComment = true; +    LangOpts.LineComment = true;    }    // Scan over the body of the comment.  The common case, when scanning, is that @@ -1973,7 +1977,7 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {            }            if (!isLexingRawMode()) -            Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment); +            Diag(OldPtr-1, diag::ext_multi_line_line_comment);            break;          }      } @@ -2002,7 +2006,7 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {    // If we are returning comments as tokens, return this comment as a token.    if (inKeepCommentMode()) -    return SaveBCPLComment(Result, CurPtr); +    return SaveLineComment(Result, CurPtr);    // If we are inside a preprocessor directive and we see the end of line,    // return immediately, so that the lexer can return this as an EOD token. @@ -2026,9 +2030,9 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {    return false;  } -/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in -/// an appropriate way and return it. -bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { +/// If in save-comment mode, package up this Line comment in an appropriate +/// way and return it. +bool Lexer::SaveLineComment(Token &Result, const char *CurPtr) {    // If we're not in a preprocessor directive, just return the // comment    // directly.    FormTokenWithChars(Result, CurPtr, tok::comment); @@ -2036,19 +2040,19 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {    if (!ParsingPreprocessorDirective || LexingRawMode)      return true; -  // If this BCPL-style comment is in a macro definition, transmogrify it into +  // If this Line-style comment is in a macro definition, transmogrify it into    // a C-style block comment.    bool Invalid = false;    std::string Spelling = PP->getSpelling(Result, &Invalid);    if (Invalid)      return true; -  assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?"); +  assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");    Spelling[1] = '*';   // Change prefix to "/*".    Spelling += "*/";    // add suffix.    Result.setKind(tok::comment); -  PP->CreateString(&Spelling[0], Spelling.size(), Result, +  PP->CreateString(Spelling, Result,                     Result.getLocation(), Result.getLocation());    return true;  } @@ -2179,7 +2183,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {  #ifdef __SSE2__        __m128i Slashes = _mm_set1_epi8('/');        while (CurPtr+16 <= BufferEnd) { -        int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)); +        int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr, +                                    Slashes));          if (cmp != 0) {            // Adjust the pointer to point directly after the first slash. It's            // not necessary to set C here, it will be overwritten at the end of @@ -2669,8 +2674,8 @@ LexNextToken:      // If the next token is obviously a // or /* */ comment, skip it efficiently      // too (without going through the big switch stmt).      if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && -        LangOpts.BCPLComment && !LangOpts.TraditionalCPP) { -      if (SkipBCPLComment(Result, CurPtr+2)) +        LangOpts.LineComment && !LangOpts.TraditionalCPP) { +      if (SkipLineComment(Result, CurPtr+2))          return; // There is a token to return.        goto SkipIgnoredUnits;      } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { @@ -2955,19 +2960,19 @@ LexNextToken:    case '/':      // 6.4.9: Comments      Char = getCharAndSize(CurPtr, SizeTmp); -    if (Char == '/') {         // BCPL comment. -      // Even if BCPL comments are disabled (e.g. in C89 mode), we generally +    if (Char == '/') {         // Line comment. +      // Even if Line comments are disabled (e.g. in C89 mode), we generally        // want to lex this as a comment.  There is one problem with this though,        // that in one particular corner case, this can change the behavior of the        // resultant program.  For example, In  "foo //**/ bar", C89 would lex -      // this as "foo / bar" and langauges with BCPL comments would lex it as +      // this as "foo / bar" and langauges with Line comments would lex it as        // "foo".  Check to see if the character after the second slash is a '*'.        // If so, we will lex that as a "/" instead of the start of a comment.        // However, we never do this in -traditional-cpp mode. -      if ((LangOpts.BCPLComment || +      if ((LangOpts.LineComment ||             getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') &&            !LangOpts.TraditionalCPP) { -        if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) +        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))            return; // There is a token to return.          // It is common for the tokens immediately after a // comment to be  | 
