aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Format/FormatTokenLexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Format/FormatTokenLexer.cpp')
-rw-r--r--clang/lib/Format/FormatTokenLexer.cpp184
1 files changed, 148 insertions, 36 deletions
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 3f9b68ccbb39..f8f5f7112188 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -193,6 +193,78 @@ void FormatTokenLexer::tryMergePreviousTokens() {
if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
return;
}
+
+ if (Style.isVerilog()) {
+ // Merge the number following a base like `'h?a0`.
+ if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&
+ Tokens.end()[-2]->is(tok::numeric_constant) &&
+ Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,
+ tok::question) &&
+ tryMergeTokens(2, TT_Unknown)) {
+ return;
+ }
+ // Part select.
+ if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},
+ TT_BitFieldColon)) {
+ return;
+ }
+ // Xnor. The combined token is treated as a caret which can also be either a
+ // unary or binary operator. The actual type is determined in
+ // TokenAnnotator. We also check the token length so we know it is not
+ // already a merged token.
+ if (Tokens.back()->TokenText.size() == 1 &&
+ tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},
+ TT_BinaryOperator)) {
+ Tokens.back()->Tok.setKind(tok::caret);
+ return;
+ }
+ // Signed shift and distribution weight.
+ if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {
+ Tokens.back()->Tok.setKind(tok::lessless);
+ return;
+ }
+ if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {
+ Tokens.back()->Tok.setKind(tok::greatergreater);
+ return;
+ }
+ if (tryMergeTokensAny({{tok::lessless, tok::equal},
+ {tok::lessless, tok::lessequal},
+ {tok::greatergreater, tok::equal},
+ {tok::greatergreater, tok::greaterequal},
+ {tok::colon, tok::equal},
+ {tok::colon, tok::slash}},
+ TT_BinaryOperator)) {
+ Tokens.back()->ForcedPrecedence = prec::Assignment;
+ return;
+ }
+ // Exponentiation, signed shift, case equality, and wildcard equality.
+ if (tryMergeTokensAny({{tok::star, tok::star},
+ {tok::lessless, tok::less},
+ {tok::greatergreater, tok::greater},
+ {tok::exclaimequal, tok::equal},
+ {tok::exclaimequal, tok::question},
+ {tok::equalequal, tok::equal},
+ {tok::equalequal, tok::question}},
+ TT_BinaryOperator)) {
+ return;
+ }
+ // Module paths in specify blocks and implications in properties.
+ if (tryMergeTokensAny({{tok::plusequal, tok::greater},
+ {tok::plus, tok::star, tok::greater},
+ {tok::minusequal, tok::greater},
+ {tok::minus, tok::star, tok::greater},
+ {tok::less, tok::arrow},
+ {tok::equal, tok::greater},
+ {tok::star, tok::greater},
+ {tok::pipeequal, tok::greater},
+ {tok::pipe, tok::arrow},
+ {tok::hash, tok::minus, tok::hash},
+ {tok::hash, tok::equal, tok::hash}},
+ TT_BinaryOperator)) {
+ Tokens.back()->ForcedPrecedence = prec::Comma;
+ return;
+ }
+ }
}
bool FormatTokenLexer::tryMergeNSStringLiteral() {
@@ -240,36 +312,32 @@ bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
return false;
// Look for @"aaaaaa" or $"aaaaaa".
- auto &String = *(Tokens.end() - 1);
- if (!String->is(tok::string_literal))
+ const auto String = *(Tokens.end() - 1);
+ if (String->isNot(tok::string_literal))
return false;
- auto &At = *(Tokens.end() - 2);
- if (!(At->is(tok::at) || At->TokenText == "$"))
+ auto Prefix = *(Tokens.end() - 2);
+ if (Prefix->isNot(tok::at) && Prefix->TokenText != "$")
return false;
- if (Tokens.size() > 2 && At->is(tok::at)) {
- auto &Dollar = *(Tokens.end() - 3);
- if (Dollar->TokenText == "$") {
- // This looks like $@"aaaaa" so we need to combine all 3 tokens.
- Dollar->Tok.setKind(tok::string_literal);
- Dollar->TokenText =
- StringRef(Dollar->TokenText.begin(),
- String->TokenText.end() - Dollar->TokenText.begin());
- Dollar->ColumnWidth += (At->ColumnWidth + String->ColumnWidth);
- Dollar->setType(TT_CSharpStringLiteral);
+ if (Tokens.size() > 2) {
+ const auto Tok = *(Tokens.end() - 3);
+ if ((Tok->TokenText == "$" && Prefix->is(tok::at)) ||
+ (Tok->is(tok::at) && Prefix->TokenText == "$")) {
+ // This looks like $@"aaa" or @$"aaa" so we need to combine all 3 tokens.
+ Tok->ColumnWidth += Prefix->ColumnWidth;
Tokens.erase(Tokens.end() - 2);
- Tokens.erase(Tokens.end() - 1);
- return true;
+ Prefix = Tok;
}
}
// Convert back into just a string_literal.
- At->Tok.setKind(tok::string_literal);
- At->TokenText = StringRef(At->TokenText.begin(),
- String->TokenText.end() - At->TokenText.begin());
- At->ColumnWidth += String->ColumnWidth;
- At->setType(TT_CSharpStringLiteral);
+ Prefix->Tok.setKind(tok::string_literal);
+ Prefix->TokenText =
+ StringRef(Prefix->TokenText.begin(),
+ String->TokenText.end() - Prefix->TokenText.begin());
+ Prefix->ColumnWidth += String->ColumnWidth;
+ Prefix->setType(TT_CSharpStringLiteral);
Tokens.erase(Tokens.end() - 1);
return true;
}
@@ -303,9 +371,11 @@ bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {
bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
if (Tokens.size() < 2)
return false;
- auto &At = *(Tokens.end() - 2);
- auto &Keyword = *(Tokens.end() - 1);
- if (!At->is(tok::at))
+ const auto At = *(Tokens.end() - 2);
+ if (At->isNot(tok::at))
+ return false;
+ const auto Keyword = *(Tokens.end() - 1);
+ if (Keyword->TokenText == "$")
return false;
if (!Keywords.isCSharpKeyword(*Keyword))
return false;
@@ -412,15 +482,28 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
SmallVectorImpl<FormatToken *>::const_iterator First =
Tokens.end() - Kinds.size();
- if (!First[0]->is(Kinds[0]))
+ for (unsigned i = 0; i < Kinds.size(); ++i)
+ if (!First[i]->is(Kinds[i]))
+ return false;
+
+ return tryMergeTokens(Kinds.size(), NewType);
+}
+
+bool FormatTokenLexer::tryMergeTokens(size_t Count, TokenType NewType) {
+ if (Tokens.size() < Count)
return false;
+
+ SmallVectorImpl<FormatToken *>::const_iterator First = Tokens.end() - Count;
unsigned AddLength = 0;
- for (unsigned i = 1; i < Kinds.size(); ++i) {
- if (!First[i]->is(Kinds[i]) || First[i]->hasWhitespaceBefore())
+ for (size_t i = 1; i < Count; ++i) {
+ // If there is whitespace separating the token and the previous one,
+ // they should not be merged.
+ if (First[i]->hasWhitespaceBefore())
return false;
AddLength += First[i]->TokenText.size();
}
- Tokens.resize(Tokens.size() - Kinds.size() + 1);
+
+ Tokens.resize(Tokens.size() - Count + 1);
First[0]->TokenText = StringRef(First[0]->TokenText.data(),
First[0]->TokenText.size() + AddLength);
First[0]->ColumnWidth += AddLength;
@@ -428,6 +511,13 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
return true;
}
+bool FormatTokenLexer::tryMergeTokensAny(
+ ArrayRef<ArrayRef<tok::TokenKind>> Kinds, TokenType NewType) {
+ return llvm::any_of(Kinds, [this, NewType](ArrayRef<tok::TokenKind> Kinds) {
+ return tryMergeTokens(Kinds, NewType);
+ });
+}
+
// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
// NB: This is not entirely correct, as an r_paren can introduce an operand
@@ -591,7 +681,7 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
bool Verbatim = false;
bool Interpolated = false;
- if (TokenText.startswith(R"($@")")) {
+ if (TokenText.startswith(R"($@")") || TokenText.startswith(R"(@$")")) {
Verbatim = true;
Interpolated = true;
} else if (TokenText.startswith(R"(@")")) {
@@ -670,6 +760,7 @@ void FormatTokenLexer::handleTemplateStrings() {
for (; Offset != Lex->getBuffer().end(); ++Offset) {
if (Offset[0] == '`') {
StateStack.pop();
+ ++Offset;
break;
}
if (Offset[0] == '\\') {
@@ -678,12 +769,12 @@ void FormatTokenLexer::handleTemplateStrings() {
Offset[1] == '{') {
// '${' introduces an expression interpolation in the template string.
StateStack.push(LexerState::NORMAL);
- ++Offset;
+ Offset += 2;
break;
}
}
- StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
+ StringRef LiteralText(TmplBegin, Offset - TmplBegin);
BacktickToken->setType(TT_TemplateString);
BacktickToken->Tok.setKind(tok::string_literal);
BacktickToken->TokenText = LiteralText;
@@ -704,9 +795,7 @@ void FormatTokenLexer::handleTemplateStrings() {
StartColumn, Style.TabWidth, Encoding);
}
- SourceLocation loc = Offset < Lex->getBuffer().end()
- ? Lex->getSourceLocation(Offset + 1)
- : SourceMgr.getLocForEndOfFile(ID);
+ SourceLocation loc = Lex->getSourceLocation(Offset);
resetLexer(SourceMgr.getFileOffset(loc));
}
@@ -935,7 +1024,7 @@ FormatToken *FormatTokenLexer::getNextToken() {
// the same as a single LF.
if (i + 1 < e && Text[i + 1] == '\n')
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case '\n':
++FormatTok->NewlinesBefore;
if (!InEscape)
@@ -1004,12 +1093,19 @@ FormatToken *FormatTokenLexer::getNextToken() {
}
if (Style.isVerilog()) {
+ static const llvm::Regex NumberBase("^s?[bdho]", llvm::Regex::IgnoreCase);
+ SmallVector<StringRef, 1> Matches;
// Verilog uses the backtick instead of the hash for preprocessor stuff.
// And it uses the hash for delays and parameter lists. In order to continue
// using `tok::hash` in other places, the backtick gets marked as the hash
// here. And in order to tell the backtick and hash apart for
// Verilog-specific stuff, the hash becomes an identifier.
- if (FormatTok->isOneOf(tok::hash, tok::hashhash)) {
+ if (FormatTok->is(tok::numeric_constant)) {
+ // In Verilog the quote is not part of a number.
+ auto Quote = FormatTok->TokenText.find('\'');
+ if (Quote != StringRef::npos)
+ truncateToken(Quote);
+ } else if (FormatTok->isOneOf(tok::hash, tok::hashhash)) {
FormatTok->Tok.setKind(tok::raw_identifier);
} else if (FormatTok->is(tok::raw_identifier)) {
if (FormatTok->TokenText == "`") {
@@ -1018,6 +1114,15 @@ FormatToken *FormatTokenLexer::getNextToken() {
} else if (FormatTok->TokenText == "``") {
FormatTok->Tok.setIdentifierInfo(nullptr);
FormatTok->Tok.setKind(tok::hashhash);
+ } else if (Tokens.size() > 0 &&
+ Tokens.back()->is(Keywords.kw_apostrophe) &&
+ NumberBase.match(FormatTok->TokenText, &Matches)) {
+ // In Verilog in a based number literal like `'b10`, there may be
+ // whitespace between `'b` and `10`. Therefore we handle the base and
+ // the rest of the number literal as two tokens. But if there is no
+ // space in the input code, we need to manually separate the two parts.
+ truncateToken(Matches[0].size());
+ FormatTok->setFinalizedType(TT_VerilogNumberBase);
}
}
}
@@ -1060,6 +1165,13 @@ FormatToken *FormatTokenLexer::getNextToken() {
StateStack.push(LexerState::TOKEN_STASHED);
}
+ if (Style.isVerilog() && Tokens.size() > 0 &&
+ Tokens.back()->is(TT_VerilogNumberBase) &&
+ FormatTok->Tok.isOneOf(tok::identifier, tok::question)) {
+ // Mark the number following a base like `'h?a0` as a number.
+ FormatTok->Tok.setKind(tok::numeric_constant);
+ }
+
// Now FormatTok is the next non-whitespace token.
StringRef Text = FormatTok->TokenText;