aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-08-22 19:00:43 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-11-13 20:39:49 +0000
commitfe6060f10f634930ff71b7c50291ddc610da2475 (patch)
tree1483580c790bd4d27b6500a7542b5ee00534d3cc /contrib/llvm-project/clang/lib/Lex/Lexer.cpp
parentb61bce17f346d79cecfd8f195a64b10f77be43b1 (diff)
parent344a3780b2e33f6ca763666c380202b18aab72a3 (diff)
Diffstat (limited to 'contrib/llvm-project/clang/lib/Lex/Lexer.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/Lex/Lexer.cpp118
1 files changed, 77 insertions, 41 deletions
diff --git a/contrib/llvm-project/clang/lib/Lex/Lexer.cpp b/contrib/llvm-project/clang/lib/Lex/Lexer.cpp
index 34732b659771..3034af231e0e 100644
--- a/contrib/llvm-project/clang/lib/Lex/Lexer.cpp
+++ b/contrib/llvm-project/clang/lib/Lex/Lexer.cpp
@@ -588,7 +588,7 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
// Create a lexer starting at the beginning of the file. Note that we use a
// "fake" file source location at offset 1 so that the lexer will track our
// position within the file.
- const unsigned StartOffset = 1;
+ const SourceLocation::UIntTy StartOffset = 1;
SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset);
Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
Buffer.end());
@@ -682,6 +682,8 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
.Case("ifdef", PDK_Skipped)
.Case("ifndef", PDK_Skipped)
.Case("elif", PDK_Skipped)
+ .Case("elifdef", PDK_Skipped)
+ .Case("elifndef", PDK_Skipped)
.Case("else", PDK_Skipped)
.Case("endif", PDK_Skipped)
.Default(PDK_Unknown);
@@ -875,6 +877,14 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
return CharSourceRange::getCharRange(Begin, End);
}
+// Assumes that `Loc` is in an expansion.
+static bool isInExpansionTokenRange(const SourceLocation Loc,
+ const SourceManager &SM) {
+ return SM.getSLocEntry(SM.getFileID(Loc))
+ .getExpansion()
+ .isExpansionTokenRange();
+}
+
CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
const SourceManager &SM,
const LangOptions &LangOpts) {
@@ -894,10 +904,12 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
}
if (Begin.isFileID() && End.isMacroID()) {
- if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,
- &End)) ||
- (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
- &End)))
+ if (Range.isTokenRange()) {
+ if (!isAtEndOfMacroExpansion(End, SM, LangOpts, &End))
+ return {};
+ // Use the *original* end, not the expanded one in `End`.
+ Range.setTokenRange(isInExpansionTokenRange(Range.getEnd(), SM));
+ } else if (!isAtStartOfMacroExpansion(End, SM, LangOpts, &End))
return {};
Range.setEnd(End);
return makeRangeFromFileLocs(Range, SM, LangOpts);
@@ -912,6 +924,9 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
&MacroEnd)))) {
Range.setBegin(MacroBegin);
Range.setEnd(MacroEnd);
+ // Use the *original* `End`, not the expanded one in `MacroEnd`.
+ if (Range.isTokenRange())
+ Range.setTokenRange(isInExpansionTokenRange(End, SM));
return makeRangeFromFileLocs(Range, SM, LangOpts);
}
@@ -1788,12 +1803,14 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
}
// If we have a digit separator, continue.
- if (C == '\'' && getLangOpts().CPlusPlus14) {
+ if (C == '\'' && (getLangOpts().CPlusPlus14 || getLangOpts().C2x)) {
unsigned NextSize;
char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts());
if (isIdentifierBody(Next)) {
if (!isLexingRawMode())
- Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
+ Diag(CurPtr, getLangOpts().CPlusPlus
+ ? diag::warn_cxx11_compat_digit_separator
+ : diag::warn_c2x_compat_digit_separator);
CurPtr = ConsumeChar(CurPtr, Size, Result);
CurPtr = ConsumeChar(CurPtr, NextSize, Result);
return LexNumericConstant(Result, CurPtr);
@@ -2059,7 +2076,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
if (C == '\\')
C = getAndAdvanceChar(CurPtr, Result);
- if (C == '\n' || C == '\r' || // Newline.
+ if (isVerticalWhitespace(C) || // Newline.
(C == 0 && (CurPtr - 1 == BufferEnd))) { // End of file.
// If the filename is unterminated, then it must just be a lone <
// character. Return this as such.
@@ -2441,56 +2458,70 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
Lexer *L) {
assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
- // Back up off the newline.
- --CurPtr;
+ // Position of the first trigraph in the ending sequence.
+ const char *TrigraphPos = 0;
+ // Position of the first whitespace after a '\' in the ending sequence.
+ const char *SpacePos = 0;
- // If this is a two-character newline sequence, skip the other character.
- if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
- // \n\n or \r\r -> not escaped newline.
- if (CurPtr[0] == CurPtr[1])
- return false;
- // \n\r or \r\n -> skip the newline.
+ while (true) {
+ // Back up off the newline.
--CurPtr;
- }
- // If we have horizontal whitespace, skip over it. We allow whitespace
- // between the slash and newline.
- bool HasSpace = false;
- while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
- --CurPtr;
- HasSpace = true;
- }
+ // If this is a two-character newline sequence, skip the other character.
+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+ // \n\n or \r\r -> not escaped newline.
+ if (CurPtr[0] == CurPtr[1])
+ return false;
+ // \n\r or \r\n -> skip the newline.
+ --CurPtr;
+ }
- // If we have a slash, we know this is an escaped newline.
- if (*CurPtr == '\\') {
- if (CurPtr[-1] != '*') return false;
- } else {
- // It isn't a slash, is it the ?? / trigraph?
- if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
- CurPtr[-3] != '*')
+ // If we have horizontal whitespace, skip over it. We allow whitespace
+ // between the slash and newline.
+ while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+ SpacePos = CurPtr;
+ --CurPtr;
+ }
+
+ // If we have a slash, this is an escaped newline.
+ if (*CurPtr == '\\') {
+ --CurPtr;
+ } else if (CurPtr[0] == '/' && CurPtr[-1] == '?' && CurPtr[-2] == '?') {
+ // This is a trigraph encoding of a slash.
+ TrigraphPos = CurPtr - 2;
+ CurPtr -= 3;
+ } else {
return false;
+ }
- // This is the trigraph ending the comment. Emit a stern warning!
- CurPtr -= 2;
+ // If the character preceding the escaped newline is a '*', then after line
+ // splicing we have a '*/' ending the comment.
+ if (*CurPtr == '*')
+ break;
+ if (*CurPtr != '\n' && *CurPtr != '\r')
+ return false;
+ }
+
+ if (TrigraphPos) {
// If no trigraphs are enabled, warn that we ignored this trigraph and
// ignore this * character.
if (!L->getLangOpts().Trigraphs) {
if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+ L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
return false;
}
if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+ L->Diag(TrigraphPos, diag::trigraph_ends_block_comment);
}
// Warn about having an escaped newline between the */ characters.
if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
+ L->Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
// If there was space between the backslash and newline, warn about it.
- if (HasSpace && !L->isLexingRawMode())
- L->Diag(CurPtr, diag::backslash_newline_space);
+ if (SpacePos && !L->isLexingRawMode())
+ L->Diag(SpacePos, diag::backslash_newline_space);
return true;
}
@@ -2762,6 +2793,11 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {
PP->setRecordedPreambleConditionalStack(ConditionalStack);
+ // If the preamble cuts off the end of a header guard, consider it guarded.
+ // The guard is valid for the preamble content itself, and for tools the
+ // most useful answer is "yes, this file has a header guard".
+ if (!ConditionalStack.empty())
+ MIOpt.ExitTopLevelConditional();
ConditionalStack.clear();
}
@@ -3206,10 +3242,10 @@ LexNextToken:
const char *CurPtr = BufferPtr;
// Small amounts of horizontal whitespace is very common between tokens.
- if ((*CurPtr == ' ') || (*CurPtr == '\t')) {
- ++CurPtr;
- while ((*CurPtr == ' ') || (*CurPtr == '\t'))
+ if (isHorizontalWhitespace(*CurPtr)) {
+ do {
++CurPtr;
+ } while (isHorizontalWhitespace(*CurPtr));
// If we are keeping whitespace and other tokens, just return what we just
// skipped. The next lexer invocation will return the token after the