src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-08-22 19:00:43 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-11-13 20:39:49 +0000
commit	fe6060f10f634930ff71b7c50291ddc610da2475 (patch)
tree	1483580c790bd4d27b6500a7542b5ee00534d3cc /contrib/llvm-project/clang/lib/Lex/Lexer.cpp
parent	b61bce17f346d79cecfd8f195a64b10f77be43b1 (diff)
parent	344a3780b2e33f6ca763666c380202b18aab72a3 (diff)

Diffstat (limited to 'contrib/llvm-project/clang/lib/Lex/Lexer.cpp')

-rw-r--r--

contrib/llvm-project/clang/lib/Lex/Lexer.cpp

118

1 files changed, 77 insertions, 41 deletions

diff --git a/contrib/llvm-project/clang/lib/Lex/Lexer.cpp b/contrib/llvm-project/clang/lib/Lex/Lexer.cpp
index 34732b659771..3034af231e0e 100644
--- a/contrib/llvm-project/clang/lib/Lex/Lexer.cpp
+++ b/contrib/llvm-project/clang/lib/Lex/Lexer.cpp

@@ -588,7 +588,7 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer,

// Create a lexer starting at the beginning of the file. Note that we use a

// "fake" file source location at offset 1 so that the lexer will track our

// position within the file.

- const unsigned StartOffset = 1;

+ const SourceLocation::UIntTy StartOffset = 1;

SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset);

Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),

Buffer.end());

@@ -682,6 +682,8 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer,

.Case("ifdef", PDK_Skipped)

.Case("ifndef", PDK_Skipped)

.Case("elif", PDK_Skipped)

+ .Case("elifdef", PDK_Skipped)

+ .Case("elifndef", PDK_Skipped)

.Case("else", PDK_Skipped)

.Case("endif", PDK_Skipped)

.Default(PDK_Unknown);

@@ -875,6 +877,14 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,

return CharSourceRange::getCharRange(Begin, End);

}

+// Assumes that `Loc` is in an expansion.

+static bool isInExpansionTokenRange(const SourceLocation Loc,

+ const SourceManager &SM) {

+ return SM.getSLocEntry(SM.getFileID(Loc))

+ .getExpansion()

+ .isExpansionTokenRange();

CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,

const SourceManager &SM,

const LangOptions &LangOpts) {

@@ -894,10 +904,12 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,

}

if (Begin.isFileID() && End.isMacroID()) {

- if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,

- &End)) ||

- (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,

- &End)))

+ if (Range.isTokenRange()) {

+ if (!isAtEndOfMacroExpansion(End, SM, LangOpts, &End))

+ return {};

+ // Use the *original* end, not the expanded one in `End`.

+ Range.setTokenRange(isInExpansionTokenRange(Range.getEnd(), SM));

+ } else if (!isAtStartOfMacroExpansion(End, SM, LangOpts, &End))

return {};

Range.setEnd(End);

return makeRangeFromFileLocs(Range, SM, LangOpts);

@@ -912,6 +924,9 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,

&MacroEnd)))) {

Range.setBegin(MacroBegin);

Range.setEnd(MacroEnd);

+ // Use the *original* `End`, not the expanded one in `MacroEnd`.

+ if (Range.isTokenRange())

+ Range.setTokenRange(isInExpansionTokenRange(End, SM));

return makeRangeFromFileLocs(Range, SM, LangOpts);

}

@@ -1788,12 +1803,14 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {

}

// If we have a digit separator, continue.

- if (C == '\'' && getLangOpts().CPlusPlus14) {

+ if (C == '\'' && (getLangOpts().CPlusPlus14 || getLangOpts().C2x)) {

unsigned NextSize;

char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts());

if (isIdentifierBody(Next)) {

if (!isLexingRawMode())

- Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);

+ Diag(CurPtr, getLangOpts().CPlusPlus

+ ? diag::warn_cxx11_compat_digit_separator

+ : diag::warn_c2x_compat_digit_separator);

CurPtr = ConsumeChar(CurPtr, Size, Result);

CurPtr = ConsumeChar(CurPtr, NextSize, Result);

return LexNumericConstant(Result, CurPtr);

@@ -2059,7 +2076,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {

if (C == '\\')

C = getAndAdvanceChar(CurPtr, Result);

- if (C == '\n' || C == '\r' || // Newline.

+ if (isVerticalWhitespace(C) || // Newline.

(C == 0 && (CurPtr - 1 == BufferEnd))) { // End of file.

// If the filename is unterminated, then it must just be a lone <

// character. Return this as such.

@@ -2441,56 +2458,70 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,

Lexer *L) {

assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');

- // Back up off the newline.

- --CurPtr;

+ // Position of the first trigraph in the ending sequence.

+ const char *TrigraphPos = 0;

+ // Position of the first whitespace after a '\' in the ending sequence.

+ const char *SpacePos = 0;

- // If this is a two-character newline sequence, skip the other character.

- if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {

- // \n\n or \r\r -> not escaped newline.

- if (CurPtr[0] == CurPtr[1])

- return false;

- // \n\r or \r\n -> skip the newline.

+ while (true) {

+ // Back up off the newline.

--CurPtr;

- }

- // If we have horizontal whitespace, skip over it. We allow whitespace

- // between the slash and newline.

- bool HasSpace = false;

- while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {

- --CurPtr;

- HasSpace = true;

- }

+ // If this is a two-character newline sequence, skip the other character.

+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {

+ // \n\n or \r\r -> not escaped newline.

+ if (CurPtr[0] == CurPtr[1])

+ return false;

+ // \n\r or \r\n -> skip the newline.

+ --CurPtr;

+ }

- // If we have a slash, we know this is an escaped newline.

- if (*CurPtr == '\\') {

- if (CurPtr[-1] != '*') return false;

- } else {

- // It isn't a slash, is it the ?? / trigraph?

- if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||

- CurPtr[-3] != '*')

+ // If we have horizontal whitespace, skip over it. We allow whitespace

+ // between the slash and newline.

+ while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {

+ SpacePos = CurPtr;

+ --CurPtr;

+ }

+ // If we have a slash, this is an escaped newline.

+ if (*CurPtr == '\\') {

+ --CurPtr;

+ } else if (CurPtr[0] == '/' && CurPtr[-1] == '?' && CurPtr[-2] == '?') {

+ // This is a trigraph encoding of a slash.

+ TrigraphPos = CurPtr - 2;

+ CurPtr -= 3;

+ } else {

return false;

+ }

- // This is the trigraph ending the comment. Emit a stern warning!

- CurPtr -= 2;

+ // If the character preceding the escaped newline is a '*', then after line

+ // splicing we have a '*/' ending the comment.

+ if (*CurPtr == '*')

+ break;

+ if (*CurPtr != '\n' && *CurPtr != '\r')

+ return false;

+ }

+ if (TrigraphPos) {

// If no trigraphs are enabled, warn that we ignored this trigraph and

// ignore this * character.

if (!L->getLangOpts().Trigraphs) {

if (!L->isLexingRawMode())

- L->Diag(CurPtr, diag::trigraph_ignored_block_comment);

+ L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment);

return false;

}

if (!L->isLexingRawMode())

- L->Diag(CurPtr, diag::trigraph_ends_block_comment);

+ L->Diag(TrigraphPos, diag::trigraph_ends_block_comment);

}

// Warn about having an escaped newline between the */ characters.

if (!L->isLexingRawMode())

- L->Diag(CurPtr, diag::escaped_newline_block_comment_end);

+ L->Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);

// If there was space between the backslash and newline, warn about it.

- if (HasSpace && !L->isLexingRawMode())

- L->Diag(CurPtr, diag::backslash_newline_space);

+ if (SpacePos && !L->isLexingRawMode())

+ L->Diag(SpacePos, diag::backslash_newline_space);

return true;

}

@@ -2762,6 +2793,11 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {

if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {

PP->setRecordedPreambleConditionalStack(ConditionalStack);

+ // If the preamble cuts off the end of a header guard, consider it guarded.

+ // The guard is valid for the preamble content itself, and for tools the

+ // most useful answer is "yes, this file has a header guard".

+ if (!ConditionalStack.empty())

+ MIOpt.ExitTopLevelConditional();

ConditionalStack.clear();

}

@@ -3206,10 +3242,10 @@ LexNextToken:

const char *CurPtr = BufferPtr;

// Small amounts of horizontal whitespace is very common between tokens.

- if ((*CurPtr == ' ') || (*CurPtr == '\t')) {

- ++CurPtr;

- while ((*CurPtr == ' ') || (*CurPtr == '\t'))

+ if (isHorizontalWhitespace(*CurPtr)) {

+ do {

++CurPtr;

+ } while (isHorizontalWhitespace(*CurPtr));

// If we are keeping whitespace and other tokens, just return what we just

// skipped. The next lexer invocation will return the token after the