diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-08-22 19:00:43 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-11-13 20:39:49 +0000 |
commit | fe6060f10f634930ff71b7c50291ddc610da2475 (patch) | |
tree | 1483580c790bd4d27b6500a7542b5ee00534d3cc /contrib/llvm-project/clang/lib/Lex/Lexer.cpp | |
parent | b61bce17f346d79cecfd8f195a64b10f77be43b1 (diff) | |
parent | 344a3780b2e33f6ca763666c380202b18aab72a3 (diff) |
Diffstat (limited to 'contrib/llvm-project/clang/lib/Lex/Lexer.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/Lex/Lexer.cpp | 118 |
1 files changed, 77 insertions, 41 deletions
diff --git a/contrib/llvm-project/clang/lib/Lex/Lexer.cpp b/contrib/llvm-project/clang/lib/Lex/Lexer.cpp index 34732b659771..3034af231e0e 100644 --- a/contrib/llvm-project/clang/lib/Lex/Lexer.cpp +++ b/contrib/llvm-project/clang/lib/Lex/Lexer.cpp @@ -588,7 +588,7 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer, // Create a lexer starting at the beginning of the file. Note that we use a // "fake" file source location at offset 1 so that the lexer will track our // position within the file. - const unsigned StartOffset = 1; + const SourceLocation::UIntTy StartOffset = 1; SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset); Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(), Buffer.end()); @@ -682,6 +682,8 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer, .Case("ifdef", PDK_Skipped) .Case("ifndef", PDK_Skipped) .Case("elif", PDK_Skipped) + .Case("elifdef", PDK_Skipped) + .Case("elifndef", PDK_Skipped) .Case("else", PDK_Skipped) .Case("endif", PDK_Skipped) .Default(PDK_Unknown); @@ -875,6 +877,14 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, return CharSourceRange::getCharRange(Begin, End); } +// Assumes that `Loc` is in an expansion. +static bool isInExpansionTokenRange(const SourceLocation Loc, + const SourceManager &SM) { + return SM.getSLocEntry(SM.getFileID(Loc)) + .getExpansion() + .isExpansionTokenRange(); +} + CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts) { @@ -894,10 +904,12 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, } if (Begin.isFileID() && End.isMacroID()) { - if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts, - &End)) || - (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts, - &End))) + if (Range.isTokenRange()) { + if (!isAtEndOfMacroExpansion(End, SM, LangOpts, &End)) + return {}; + // Use the *original* end, not the expanded one in `End`. + Range.setTokenRange(isInExpansionTokenRange(Range.getEnd(), SM)); + } else if (!isAtStartOfMacroExpansion(End, SM, LangOpts, &End)) return {}; Range.setEnd(End); return makeRangeFromFileLocs(Range, SM, LangOpts); @@ -912,6 +924,9 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, &MacroEnd)))) { Range.setBegin(MacroBegin); Range.setEnd(MacroEnd); + // Use the *original* `End`, not the expanded one in `MacroEnd`. + if (Range.isTokenRange()) + Range.setTokenRange(isInExpansionTokenRange(End, SM)); return makeRangeFromFileLocs(Range, SM, LangOpts); } @@ -1788,12 +1803,14 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { } // If we have a digit separator, continue. - if (C == '\'' && getLangOpts().CPlusPlus14) { + if (C == '\'' && (getLangOpts().CPlusPlus14 || getLangOpts().C2x)) { unsigned NextSize; char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts()); if (isIdentifierBody(Next)) { if (!isLexingRawMode()) - Diag(CurPtr, diag::warn_cxx11_compat_digit_separator); + Diag(CurPtr, getLangOpts().CPlusPlus + ? diag::warn_cxx11_compat_digit_separator + : diag::warn_c2x_compat_digit_separator); CurPtr = ConsumeChar(CurPtr, Size, Result); CurPtr = ConsumeChar(CurPtr, NextSize, Result); return LexNumericConstant(Result, CurPtr); @@ -2059,7 +2076,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { if (C == '\\') C = getAndAdvanceChar(CurPtr, Result); - if (C == '\n' || C == '\r' || // Newline. + if (isVerticalWhitespace(C) || // Newline. (C == 0 && (CurPtr - 1 == BufferEnd))) { // End of file. // If the filename is unterminated, then it must just be a lone < // character. Return this as such. @@ -2441,56 +2458,70 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L) { assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); - // Back up off the newline. - --CurPtr; + // Position of the first trigraph in the ending sequence. + const char *TrigraphPos = 0; + // Position of the first whitespace after a '\' in the ending sequence. + const char *SpacePos = 0; - // If this is a two-character newline sequence, skip the other character. - if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { - // \n\n or \r\r -> not escaped newline. - if (CurPtr[0] == CurPtr[1]) - return false; - // \n\r or \r\n -> skip the newline. + while (true) { + // Back up off the newline. --CurPtr; - } - // If we have horizontal whitespace, skip over it. We allow whitespace - // between the slash and newline. - bool HasSpace = false; - while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { - --CurPtr; - HasSpace = true; - } + // If this is a two-character newline sequence, skip the other character. + if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { + // \n\n or \r\r -> not escaped newline. + if (CurPtr[0] == CurPtr[1]) + return false; + // \n\r or \r\n -> skip the newline. + --CurPtr; + } - // If we have a slash, we know this is an escaped newline. - if (*CurPtr == '\\') { - if (CurPtr[-1] != '*') return false; - } else { - // It isn't a slash, is it the ?? / trigraph? - if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || - CurPtr[-3] != '*') + // If we have horizontal whitespace, skip over it. We allow whitespace + // between the slash and newline. + while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { + SpacePos = CurPtr; + --CurPtr; + } + + // If we have a slash, this is an escaped newline. + if (*CurPtr == '\\') { + --CurPtr; + } else if (CurPtr[0] == '/' && CurPtr[-1] == '?' && CurPtr[-2] == '?') { + // This is a trigraph encoding of a slash. + TrigraphPos = CurPtr - 2; + CurPtr -= 3; + } else { return false; + } - // This is the trigraph ending the comment. Emit a stern warning! - CurPtr -= 2; + // If the character preceding the escaped newline is a '*', then after line + // splicing we have a '*/' ending the comment. + if (*CurPtr == '*') + break; + if (*CurPtr != '\n' && *CurPtr != '\r') + return false; + } + + if (TrigraphPos) { // If no trigraphs are enabled, warn that we ignored this trigraph and // ignore this * character. if (!L->getLangOpts().Trigraphs) { if (!L->isLexingRawMode()) - L->Diag(CurPtr, diag::trigraph_ignored_block_comment); + L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment); return false; } if (!L->isLexingRawMode()) - L->Diag(CurPtr, diag::trigraph_ends_block_comment); + L->Diag(TrigraphPos, diag::trigraph_ends_block_comment); } // Warn about having an escaped newline between the */ characters. if (!L->isLexingRawMode()) - L->Diag(CurPtr, diag::escaped_newline_block_comment_end); + L->Diag(CurPtr + 1, diag::escaped_newline_block_comment_end); // If there was space between the backslash and newline, warn about it. - if (HasSpace && !L->isLexingRawMode()) - L->Diag(CurPtr, diag::backslash_newline_space); + if (SpacePos && !L->isLexingRawMode()) + L->Diag(SpacePos, diag::backslash_newline_space); return true; } @@ -2762,6 +2793,11 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) { PP->setRecordedPreambleConditionalStack(ConditionalStack); + // If the preamble cuts off the end of a header guard, consider it guarded. + // The guard is valid for the preamble content itself, and for tools the + // most useful answer is "yes, this file has a header guard". + if (!ConditionalStack.empty()) + MIOpt.ExitTopLevelConditional(); ConditionalStack.clear(); } @@ -3206,10 +3242,10 @@ LexNextToken: const char *CurPtr = BufferPtr; // Small amounts of horizontal whitespace is very common between tokens. - if ((*CurPtr == ' ') || (*CurPtr == '\t')) { - ++CurPtr; - while ((*CurPtr == ' ') || (*CurPtr == '\t')) + if (isHorizontalWhitespace(*CurPtr)) { + do { ++CurPtr; + } while (isHorizontalWhitespace(*CurPtr)); // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the |