diff options
Diffstat (limited to 'clang/lib/Format/FormatTokenLexer.cpp')
-rw-r--r-- | clang/lib/Format/FormatTokenLexer.cpp | 184 |
1 files changed, 148 insertions, 36 deletions
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 3f9b68ccbb39..f8f5f7112188 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -193,6 +193,78 @@ void FormatTokenLexer::tryMergePreviousTokens() { if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator)) return; } + + if (Style.isVerilog()) { + // Merge the number following a base like `'h?a0`. + if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) && + Tokens.end()[-2]->is(tok::numeric_constant) && + Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier, + tok::question) && + tryMergeTokens(2, TT_Unknown)) { + return; + } + // Part select. + if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}}, + TT_BitFieldColon)) { + return; + } + // Xnor. The combined token is treated as a caret which can also be either a + // unary or binary operator. The actual type is determined in + // TokenAnnotator. We also check the token length so we know it is not + // already a merged token. + if (Tokens.back()->TokenText.size() == 1 && + tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}}, + TT_BinaryOperator)) { + Tokens.back()->Tok.setKind(tok::caret); + return; + } + // Signed shift and distribution weight. + if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) { + Tokens.back()->Tok.setKind(tok::lessless); + return; + } + if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) { + Tokens.back()->Tok.setKind(tok::greatergreater); + return; + } + if (tryMergeTokensAny({{tok::lessless, tok::equal}, + {tok::lessless, tok::lessequal}, + {tok::greatergreater, tok::equal}, + {tok::greatergreater, tok::greaterequal}, + {tok::colon, tok::equal}, + {tok::colon, tok::slash}}, + TT_BinaryOperator)) { + Tokens.back()->ForcedPrecedence = prec::Assignment; + return; + } + // Exponentiation, signed shift, case equality, and wildcard equality. + if (tryMergeTokensAny({{tok::star, tok::star}, + {tok::lessless, tok::less}, + {tok::greatergreater, tok::greater}, + {tok::exclaimequal, tok::equal}, + {tok::exclaimequal, tok::question}, + {tok::equalequal, tok::equal}, + {tok::equalequal, tok::question}}, + TT_BinaryOperator)) { + return; + } + // Module paths in specify blocks and implications in properties. + if (tryMergeTokensAny({{tok::plusequal, tok::greater}, + {tok::plus, tok::star, tok::greater}, + {tok::minusequal, tok::greater}, + {tok::minus, tok::star, tok::greater}, + {tok::less, tok::arrow}, + {tok::equal, tok::greater}, + {tok::star, tok::greater}, + {tok::pipeequal, tok::greater}, + {tok::pipe, tok::arrow}, + {tok::hash, tok::minus, tok::hash}, + {tok::hash, tok::equal, tok::hash}}, + TT_BinaryOperator)) { + Tokens.back()->ForcedPrecedence = prec::Comma; + return; + } + } } bool FormatTokenLexer::tryMergeNSStringLiteral() { @@ -240,36 +312,32 @@ bool FormatTokenLexer::tryMergeCSharpStringLiteral() { return false; // Look for @"aaaaaa" or $"aaaaaa". - auto &String = *(Tokens.end() - 1); - if (!String->is(tok::string_literal)) + const auto String = *(Tokens.end() - 1); + if (String->isNot(tok::string_literal)) return false; - auto &At = *(Tokens.end() - 2); - if (!(At->is(tok::at) || At->TokenText == "$")) + auto Prefix = *(Tokens.end() - 2); + if (Prefix->isNot(tok::at) && Prefix->TokenText != "$") return false; - if (Tokens.size() > 2 && At->is(tok::at)) { - auto &Dollar = *(Tokens.end() - 3); - if (Dollar->TokenText == "$") { - // This looks like $@"aaaaa" so we need to combine all 3 tokens. - Dollar->Tok.setKind(tok::string_literal); - Dollar->TokenText = - StringRef(Dollar->TokenText.begin(), - String->TokenText.end() - Dollar->TokenText.begin()); - Dollar->ColumnWidth += (At->ColumnWidth + String->ColumnWidth); - Dollar->setType(TT_CSharpStringLiteral); + if (Tokens.size() > 2) { + const auto Tok = *(Tokens.end() - 3); + if ((Tok->TokenText == "$" && Prefix->is(tok::at)) || + (Tok->is(tok::at) && Prefix->TokenText == "$")) { + // This looks like $@"aaa" or @$"aaa" so we need to combine all 3 tokens. + Tok->ColumnWidth += Prefix->ColumnWidth; Tokens.erase(Tokens.end() - 2); - Tokens.erase(Tokens.end() - 1); - return true; + Prefix = Tok; } } // Convert back into just a string_literal. - At->Tok.setKind(tok::string_literal); - At->TokenText = StringRef(At->TokenText.begin(), - String->TokenText.end() - At->TokenText.begin()); - At->ColumnWidth += String->ColumnWidth; - At->setType(TT_CSharpStringLiteral); + Prefix->Tok.setKind(tok::string_literal); + Prefix->TokenText = + StringRef(Prefix->TokenText.begin(), + String->TokenText.end() - Prefix->TokenText.begin()); + Prefix->ColumnWidth += String->ColumnWidth; + Prefix->setType(TT_CSharpStringLiteral); Tokens.erase(Tokens.end() - 1); return true; } @@ -303,9 +371,11 @@ bool FormatTokenLexer::tryMergeNullishCoalescingEqual() { bool FormatTokenLexer::tryMergeCSharpKeywordVariables() { if (Tokens.size() < 2) return false; - auto &At = *(Tokens.end() - 2); - auto &Keyword = *(Tokens.end() - 1); - if (!At->is(tok::at)) + const auto At = *(Tokens.end() - 2); + if (At->isNot(tok::at)) + return false; + const auto Keyword = *(Tokens.end() - 1); + if (Keyword->TokenText == "$") return false; if (!Keywords.isCSharpKeyword(*Keyword)) return false; @@ -412,15 +482,28 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, SmallVectorImpl<FormatToken *>::const_iterator First = Tokens.end() - Kinds.size(); - if (!First[0]->is(Kinds[0])) + for (unsigned i = 0; i < Kinds.size(); ++i) + if (!First[i]->is(Kinds[i])) + return false; + + return tryMergeTokens(Kinds.size(), NewType); +} + +bool FormatTokenLexer::tryMergeTokens(size_t Count, TokenType NewType) { + if (Tokens.size() < Count) return false; + + SmallVectorImpl<FormatToken *>::const_iterator First = Tokens.end() - Count; unsigned AddLength = 0; - for (unsigned i = 1; i < Kinds.size(); ++i) { - if (!First[i]->is(Kinds[i]) || First[i]->hasWhitespaceBefore()) + for (size_t i = 1; i < Count; ++i) { + // If there is whitespace separating the token and the previous one, + // they should not be merged. + if (First[i]->hasWhitespaceBefore()) return false; AddLength += First[i]->TokenText.size(); } - Tokens.resize(Tokens.size() - Kinds.size() + 1); + + Tokens.resize(Tokens.size() - Count + 1); First[0]->TokenText = StringRef(First[0]->TokenText.data(), First[0]->TokenText.size() + AddLength); First[0]->ColumnWidth += AddLength; @@ -428,6 +511,13 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, return true; } +bool FormatTokenLexer::tryMergeTokensAny( + ArrayRef<ArrayRef<tok::TokenKind>> Kinds, TokenType NewType) { + return llvm::any_of(Kinds, [this, NewType](ArrayRef<tok::TokenKind> Kinds) { + return tryMergeTokens(Kinds, NewType); + }); +} + // Returns \c true if \p Tok can only be followed by an operand in JavaScript. bool FormatTokenLexer::precedesOperand(FormatToken *Tok) { // NB: This is not entirely correct, as an r_paren can introduce an operand @@ -591,7 +681,7 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { bool Verbatim = false; bool Interpolated = false; - if (TokenText.startswith(R"($@")")) { + if (TokenText.startswith(R"($@")") || TokenText.startswith(R"(@$")")) { Verbatim = true; Interpolated = true; } else if (TokenText.startswith(R"(@")")) { @@ -670,6 +760,7 @@ void FormatTokenLexer::handleTemplateStrings() { for (; Offset != Lex->getBuffer().end(); ++Offset) { if (Offset[0] == '`') { StateStack.pop(); + ++Offset; break; } if (Offset[0] == '\\') { @@ -678,12 +769,12 @@ void FormatTokenLexer::handleTemplateStrings() { Offset[1] == '{') { // '${' introduces an expression interpolation in the template string. StateStack.push(LexerState::NORMAL); - ++Offset; + Offset += 2; break; } } - StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1); + StringRef LiteralText(TmplBegin, Offset - TmplBegin); BacktickToken->setType(TT_TemplateString); BacktickToken->Tok.setKind(tok::string_literal); BacktickToken->TokenText = LiteralText; @@ -704,9 +795,7 @@ void FormatTokenLexer::handleTemplateStrings() { StartColumn, Style.TabWidth, Encoding); } - SourceLocation loc = Offset < Lex->getBuffer().end() - ? Lex->getSourceLocation(Offset + 1) - : SourceMgr.getLocForEndOfFile(ID); + SourceLocation loc = Lex->getSourceLocation(Offset); resetLexer(SourceMgr.getFileOffset(loc)); } @@ -935,7 +1024,7 @@ FormatToken *FormatTokenLexer::getNextToken() { // the same as a single LF. if (i + 1 < e && Text[i + 1] == '\n') break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case '\n': ++FormatTok->NewlinesBefore; if (!InEscape) @@ -1004,12 +1093,19 @@ FormatToken *FormatTokenLexer::getNextToken() { } if (Style.isVerilog()) { + static const llvm::Regex NumberBase("^s?[bdho]", llvm::Regex::IgnoreCase); + SmallVector<StringRef, 1> Matches; // Verilog uses the backtick instead of the hash for preprocessor stuff. // And it uses the hash for delays and parameter lists. In order to continue // using `tok::hash` in other places, the backtick gets marked as the hash // here. And in order to tell the backtick and hash apart for // Verilog-specific stuff, the hash becomes an identifier. - if (FormatTok->isOneOf(tok::hash, tok::hashhash)) { + if (FormatTok->is(tok::numeric_constant)) { + // In Verilog the quote is not part of a number. + auto Quote = FormatTok->TokenText.find('\''); + if (Quote != StringRef::npos) + truncateToken(Quote); + } else if (FormatTok->isOneOf(tok::hash, tok::hashhash)) { FormatTok->Tok.setKind(tok::raw_identifier); } else if (FormatTok->is(tok::raw_identifier)) { if (FormatTok->TokenText == "`") { @@ -1018,6 +1114,15 @@ FormatToken *FormatTokenLexer::getNextToken() { } else if (FormatTok->TokenText == "``") { FormatTok->Tok.setIdentifierInfo(nullptr); FormatTok->Tok.setKind(tok::hashhash); + } else if (Tokens.size() > 0 && + Tokens.back()->is(Keywords.kw_apostrophe) && + NumberBase.match(FormatTok->TokenText, &Matches)) { + // In Verilog in a based number literal like `'b10`, there may be + // whitespace between `'b` and `10`. Therefore we handle the base and + // the rest of the number literal as two tokens. But if there is no + // space in the input code, we need to manually separate the two parts. + truncateToken(Matches[0].size()); + FormatTok->setFinalizedType(TT_VerilogNumberBase); } } } @@ -1060,6 +1165,13 @@ FormatToken *FormatTokenLexer::getNextToken() { StateStack.push(LexerState::TOKEN_STASHED); } + if (Style.isVerilog() && Tokens.size() > 0 && + Tokens.back()->is(TT_VerilogNumberBase) && + FormatTok->Tok.isOneOf(tok::identifier, tok::question)) { + // Mark the number following a base like `'h?a0` as a number. + FormatTok->Tok.setKind(tok::numeric_constant); + } + // Now FormatTok is the next non-whitespace token. StringRef Text = FormatTok->TokenText; |