diff options
Diffstat (limited to 'lib/Format')
-rw-r--r-- | lib/Format/BreakableToken.cpp | 736 | ||||
-rw-r--r-- | lib/Format/BreakableToken.h | 362 | ||||
-rw-r--r-- | lib/Format/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Format/Comments.cpp | 36 | ||||
-rw-r--r-- | lib/Format/Comments.h | 33 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 262 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.h | 19 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 147 | ||||
-rw-r--r-- | lib/Format/FormatToken.h | 22 | ||||
-rw-r--r-- | lib/Format/FormatTokenLexer.cpp | 38 | ||||
-rw-r--r-- | lib/Format/FormatTokenLexer.h | 1 | ||||
-rw-r--r-- | lib/Format/NamespaceEndCommentsFixer.cpp | 175 | ||||
-rw-r--r-- | lib/Format/NamespaceEndCommentsFixer.h | 37 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 223 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.h | 4 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 46 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.h | 8 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 286 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.h | 35 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 274 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 44 |
21 files changed, 2070 insertions, 720 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 6363f895f95b9..c97486e4e4a79 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "BreakableToken.h" -#include "Comments.h" +#include "ContinuationIndenter.h" #include "clang/Basic/CharInfo.h" #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" @@ -40,6 +40,21 @@ static bool IsBlank(char C) { } } +static StringRef getLineCommentIndentPrefix(StringRef Comment) { + static const char *const KnownPrefixes[] = {"///", "//", "//!"}; + StringRef LongestPrefix; + for (StringRef KnownPrefix : KnownPrefixes) { + if (Comment.startswith(KnownPrefix)) { + size_t PrefixLength = KnownPrefix.size(); + while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') + ++PrefixLength; + if (PrefixLength > LongestPrefix.size()) + LongestPrefix = Comment.substr(0, PrefixLength); + } + } + return LongestPrefix; +} + static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, @@ -132,37 +147,61 @@ getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, return BreakableToken::Split(StringRef::npos, 0); } +bool switchesFormatting(const FormatToken &Token) { + assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) && + "formatting regions are switched by comment tokens"); + StringRef Content = Token.TokenText.substr(2).ltrim(); + return Content.startswith("clang-format on") || + Content.startswith("clang-format off"); +} + +unsigned +BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns, + Split Split) const { + // Example: consider the content + // lala lala + // - RemainingTokenColumns is the original number of columns, 10; + // - Split is (4, 2), denoting the two spaces between the two words; + // + // We compute the number of columns when the split is compressed into a single + // space, like: + // lala lala + return RemainingTokenColumns + 1 - Split.second; +} + unsigned BreakableSingleLineToken::getLineCount() const { return 1; } unsigned BreakableSingleLineToken::getLineLengthAfterSplit( - unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { return StartColumn + Prefix.size() + Postfix.size() + - encoding::columnWidthWithTabs(Line.substr(Offset, Length), + encoding::columnWidthWithTabs(Line.substr(TailOffset, Length), StartColumn + Prefix.size(), Style.TabWidth, Encoding); } BreakableSingleLineToken::BreakableSingleLineToken( - const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, - StringRef Prefix, StringRef Postfix, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), + const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableToken(Tok, InPPDirective, Encoding, Style), StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { - assert(Tok.TokenText.endswith(Postfix)); + assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } BreakableStringLiteral::BreakableStringLiteral( - const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, - StringRef Prefix, StringRef Postfix, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, - InPPDirective, Encoding, Style) {} + const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, + StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective, + Encoding, Style) {} BreakableToken::Split BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { return getStringSplit(Line.substr(TailOffset), StartColumn + Prefix.size() + Postfix.size(), ColumnLimit, Style.TabWidth, Encoding); @@ -171,86 +210,149 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { - unsigned LeadingSpaces = StartColumn; - // The '@' of an ObjC string literal (@"Test") does not become part of the - // string token. - // FIXME: It might be a cleaner solution to merge the tokens as a - // precomputation step. - if (Prefix.startswith("@")) - --LeadingSpaces; Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, - Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); -} - -BreakableLineComment::BreakableLineComment( - const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, - bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableSingleLineToken(Token, IndentLevel, StartColumn, - getLineCommentIndentPrefix(Token.TokenText), "", - InPPDirective, Encoding, Style) { - OriginalPrefix = Prefix; - if (Token.TokenText.size() > Prefix.size() && - isAlphanumeric(Token.TokenText[Prefix.size()])) { - if (Prefix == "//") - Prefix = "// "; - else if (Prefix == "///") - Prefix = "/// "; - else if (Prefix == "//!") - Prefix = "//! "; - } + Prefix, InPPDirective, 1, StartColumn); } +BreakableComment::BreakableComment(const FormatToken &Token, + unsigned StartColumn, + bool InPPDirective, + encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableToken(Token, InPPDirective, Encoding, Style), + StartColumn(StartColumn) {} + +unsigned BreakableComment::getLineCount() const { return Lines.size(); } + BreakableToken::Split -BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), +BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + // Don't break lines matching the comment pragmas regex. + if (CommentPragmasRegex.match(Content[LineIndex])) + return Split(StringRef::npos, 0); + return getCommentSplit(Content[LineIndex].substr(TailOffset), + getContentStartColumn(LineIndex, TailOffset), ColumnLimit, Style.TabWidth, Encoding); } -void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces) { +void BreakableComment::compressWhitespace(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { + StringRef Text = Content[LineIndex].substr(TailOffset); + // Text is relative to the content line, but Whitespaces operates relative to + // the start of the corresponding token, so compute the start of the Split + // that needs to be compressed into a single space relative to the start of + // its token. + unsigned BreakOffsetInToken = + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; Whitespaces.replaceWhitespaceInToken( - Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, - Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "", + /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); } -void BreakableLineComment::replaceWhitespace(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { - Whitespaces.replaceWhitespaceInToken( - Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", - "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, - /*Spaces=*/1); -} - -void BreakableLineComment::replaceWhitespaceBefore( - unsigned LineIndex, WhitespaceManager &Whitespaces) { - if (OriginalPrefix != Prefix) { - Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", - /*InPPDirective=*/false, - /*Newlines=*/0, /*IndentLevel=*/0, - /*Spaces=*/1); +BreakableToken::Split +BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix, + unsigned PreviousEndColumn, + unsigned ColumnLimit) const { + unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size(); + StringRef TrimmedText = Text.rtrim(Blanks); + // This is the width of the resulting line in case the full line of Text gets + // reflown up starting at ReflowStartColumn. + unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs( + TrimmedText, ReflowStartColumn, + Style.TabWidth, Encoding); + // If the full line fits up, we return a reflow split after it, + // otherwise we compute the largest piece of text that fits after + // ReflowStartColumn. + Split ReflowSplit = + FullWidth <= ColumnLimit + ? Split(TrimmedText.size(), Text.size() - TrimmedText.size()) + : getCommentSplit(Text, ReflowStartColumn, ColumnLimit, + Style.TabWidth, Encoding); + + // We need to be extra careful here, because while it's OK to keep a long line + // if it can't be broken into smaller pieces (like when the first word of a + // long line is longer than the column limit), it's not OK to reflow that long + // word up. So we recompute the size of the previous line after reflowing and + // only return the reflow split if that's under the line limit. + if (ReflowSplit.first != StringRef::npos && + // Check if the width of the newly reflown line is under the limit. + PreviousEndColumn + ReflowPrefix.size() + + encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first), + PreviousEndColumn + + ReflowPrefix.size(), + Style.TabWidth, Encoding) <= + ColumnLimit) { + return ReflowSplit; } + return Split(StringRef::npos, 0); +} + +const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { + return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; +} + +static bool mayReflowContent(StringRef Content) { + Content = Content.trim(Blanks); + // Lines starting with '@' commonly have special meaning. + static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = { + "@", "TODO", "FIXME", "XXX"}; + bool hasSpecialMeaningPrefix = false; + for (StringRef Prefix : kSpecialMeaningPrefixes) { + if (Content.startswith(Prefix)) { + hasSpecialMeaningPrefix = true; + break; + } + } + // Simple heuristic for what to reflow: content should contain at least two + // characters and either the first or second character must be + // non-punctuation. + return Content.size() >= 2 && !hasSpecialMeaningPrefix && + !Content.endswith("\\") && + // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is + // true, then the first code point must be 1 byte long. + (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); } BreakableBlockComment::BreakableBlockComment( - const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, + const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { - StringRef TokenText(Token.TokenText); + : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { + assert(Tok.is(TT_BlockComment) && + "block comment section must start with a block comment"); + + StringRef TokenText(Tok.TokenText); assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); int IndentDelta = StartColumn - OriginalStartColumn; - LeadingWhitespace.resize(Lines.size()); - StartOfLineColumn.resize(Lines.size()); - StartOfLineColumn[0] = StartColumn + 2; + Content.resize(Lines.size()); + Content[0] = Lines[0]; + ContentColumn.resize(Lines.size()); + // Account for the initial '/*'. + ContentColumn[0] = StartColumn + 2; + Tokens.resize(Lines.size()); for (size_t i = 1; i < Lines.size(); ++i) adjustWhitespace(i, IndentDelta); + // Align decorations with the column of the star on the first line, + // that is one column after the start "/*". + DecorationColumn = StartColumn + 1; + + // Account for comment decoration patterns like this: + // + // /* + // ** blah blah blah + // */ + if (Lines.size() >= 2 && Content[1].startswith("**") && + static_cast<unsigned>(ContentColumn[1]) == StartColumn) { + DecorationColumn = StartColumn; + } + Decoration = "* "; if (Lines.size() == 1 && !FirstInLine) { // Comments for which FirstInLine is false can start on arbitrary column, @@ -262,49 +364,60 @@ BreakableBlockComment::BreakableBlockComment( } for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { // If the last line is empty, the closing "*/" will have a star. - if (i + 1 == e && Lines[i].empty()) + if (i + 1 == e && Content[i].empty()) break; - if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i])) + if (!Content[i].empty() && i + 1 != e && + Decoration.startswith(Content[i])) continue; - while (!Lines[i].startswith(Decoration)) + while (!Content[i].startswith(Decoration)) Decoration = Decoration.substr(0, Decoration.size() - 1); } LastLineNeedsDecoration = true; - IndentAtLineBreak = StartOfLineColumn[0] + 1; - for (size_t i = 1; i < Lines.size(); ++i) { - if (Lines[i].empty()) { - if (i + 1 == Lines.size()) { + IndentAtLineBreak = ContentColumn[0] + 1; + for (size_t i = 1, e = Lines.size(); i < e; ++i) { + if (Content[i].empty()) { + if (i + 1 == e) { // Empty last line means that we already have a star as a part of the // trailing */. We also need to preserve whitespace, so that */ is // correctly indented. LastLineNeedsDecoration = false; + // Align the star in the last '*/' with the stars on the previous lines. + if (e >= 2 && !Decoration.empty()) { + ContentColumn[i] = DecorationColumn; + } } else if (Decoration.empty()) { // For all other lines, set the start column to 0 if they're empty, so // we do not insert trailing whitespace anywhere. - StartOfLineColumn[i] = 0; + ContentColumn[i] = 0; } continue; } // The first line already excludes the star. + // The last line excludes the star if LastLineNeedsDecoration is false. // For all other lines, adjust the line to exclude the star and // (optionally) the first whitespace. - unsigned DecorationSize = - Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size(); - StartOfLineColumn[i] += DecorationSize; - Lines[i] = Lines[i].substr(DecorationSize); - LeadingWhitespace[i] += DecorationSize; - if (!Decoration.startswith(Lines[i])) + unsigned DecorationSize = Decoration.startswith(Content[i]) + ? Content[i].size() + : Decoration.size(); + if (DecorationSize) { + ContentColumn[i] = DecorationColumn + DecorationSize; + } + Content[i] = Content[i].substr(DecorationSize); + if (!Decoration.startswith(Content[i])) IndentAtLineBreak = - std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); + std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i])); } - IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); + IndentAtLineBreak = + std::max<unsigned>(IndentAtLineBreak, Decoration.size()); + DEBUG({ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; for (size_t i = 0; i < Lines.size(); ++i) { - llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] - << "\n"; + llvm::dbgs() << i << " |" << Content[i] << "| " + << "CC=" << ContentColumn[i] << "| " + << "IN=" << (Content[i].data() - Lines[i].data()) << "\n"; } }); } @@ -334,78 +447,162 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); // Adjust Lines to only contain relevant text. - Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); - Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); - // Adjust LeadingWhitespace to account all whitespace between the lines - // to the current line. - LeadingWhitespace[LineIndex] = - Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); + size_t PreviousContentOffset = + Content[LineIndex - 1].data() - Lines[LineIndex - 1].data(); + Content[LineIndex - 1] = Lines[LineIndex - 1].substr( + PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset); + Content[LineIndex] = Lines[LineIndex].substr(StartOfLine); // Adjust the start column uniformly across all lines. - StartOfLineColumn[LineIndex] = + ContentColumn[LineIndex] = encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + IndentDelta; } -unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } - unsigned BreakableBlockComment::getLineLengthAfterSplit( - unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { - unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); - return ContentStartColumn + - encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), - ContentStartColumn, Style.TabWidth, - Encoding) + - // The last line gets a "*/" postfix. - (LineIndex + 1 == Lines.size() ? 2 : 0); -} - -BreakableToken::Split -BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - return getCommentSplit(Lines[LineIndex].substr(TailOffset), - getContentStartColumn(LineIndex, TailOffset), - ColumnLimit, Style.TabWidth, Encoding); + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { + unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); + unsigned LineLength = + ContentStartColumn + encoding::columnWidthWithTabs( + Content[LineIndex].substr(TailOffset, Length), + ContentStartColumn, Style.TabWidth, Encoding); + // The last line gets a "*/" postfix. + if (LineIndex + 1 == Lines.size()) { + LineLength += 2; + // We never need a decoration when breaking just the trailing "*/" postfix. + // Note that checking that Length == 0 is not enough, since Length could + // also be StringRef::npos. + if (Content[LineIndex].substr(TailOffset, Length).empty()) { + LineLength -= Decoration.size(); + } + } + return LineLength; } void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { - StringRef Text = Lines[LineIndex].substr(TailOffset); + StringRef Text = Content[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; + // We need this to account for the case when we have a decoration "* " for all + // the lines except for the last one, where the star in "*/" acts as a + // decoration. + unsigned LocalIndentAtLineBreak = IndentAtLineBreak; if (LineIndex + 1 == Lines.size() && Text.size() == Split.first + Split.second) { // For the last line we need to break before "*/", but not to add "* ". Prefix = ""; + if (LocalIndentAtLineBreak >= 2) + LocalIndentAtLineBreak -= 2; } - + // The split offset is from the beginning of the line. Convert it to an offset + // from the beginning of the token text. unsigned BreakOffsetInToken = - Text.data() - Tok.TokenText.data() + Split.first; + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; unsigned CharsToRemove = Split.second; - assert(IndentAtLineBreak >= Decoration.size()); + assert(LocalIndentAtLineBreak >= Prefix.size()); Whitespaces.replaceWhitespaceInToken( - Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, - IndentLevel, IndentAtLineBreak - Decoration.size()); + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix, + InPPDirective, /*Newlines=*/1, + /*Spaces=*/LocalIndentAtLineBreak - Prefix.size()); } -void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { - StringRef Text = Lines[LineIndex].substr(TailOffset); - unsigned BreakOffsetInToken = - Text.data() - Tok.TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - Whitespaces.replaceWhitespaceInToken( - Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, - /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); +BreakableToken::Split BreakableBlockComment::getSplitBefore( + unsigned LineIndex, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + if (!mayReflow(LineIndex, CommentPragmasRegex)) + return Split(StringRef::npos, 0); + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn, + ColumnLimit); +} + +unsigned BreakableBlockComment::getReflownColumn( + StringRef Content, + unsigned LineIndex, + unsigned PreviousEndColumn) const { + unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); + // If this is the last line, it will carry around its '*/' postfix. + unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0); + // The line is composed of previous text, reflow prefix, reflown text and + // postfix. + unsigned ReflownColumn = + StartColumn + encoding::columnWidthWithTabs(Content, StartColumn, + Style.TabWidth, Encoding) + + PostfixLength; + return ReflownColumn; } +unsigned BreakableBlockComment::getLineLengthAfterSplitBefore( + unsigned LineIndex, unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + if (SplitBefore.first == StringRef::npos || + // Block comment line contents contain the trailing whitespace after the + // decoration, so the need of left trim. Note that this behavior is + // consistent with the breaking of block comments where the indentation of + // a broken line is uniform across all the lines of the block comment. + SplitBefore.first + SplitBefore.second < + Content[LineIndex].ltrim().size()) { + // A piece of line, not the whole, gets reflown. + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } else { + // The whole line gets reflown, need to check if we need to insert a break + // for the postfix or not. + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + unsigned ReflownColumn = + getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); + if (ReflownColumn <= ColumnLimit) { + return ReflownColumn; + } + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } +} void BreakableBlockComment::replaceWhitespaceBefore( - unsigned LineIndex, WhitespaceManager &Whitespaces) { - if (LineIndex == 0) + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + Split SplitBefore, WhitespaceManager &Whitespaces) { + if (LineIndex == 0) return; + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + if (SplitBefore.first != StringRef::npos) { + // Here we need to reflow. + assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && + "Reflowing whitespace within a token"); + // This is the offset of the end of the last line relative to the start of + // the token text in the token. + unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + + Content[LineIndex - 1].size() - + tokenAt(LineIndex).TokenText.data(); + unsigned WhitespaceLength = TrimmedContent.data() - + tokenAt(LineIndex).TokenText.data() - + WhitespaceOffsetInToken; + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), WhitespaceOffsetInToken, + /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, + /*Spaces=*/0); + // Check if we need to also insert a break at the whitespace range. + // For this we first adapt the reflow split relative to the beginning of the + // content. + // Note that we don't need a penalty for this break, since it doesn't change + // the total number of lines. + Split BreakSplit = SplitBefore; + BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data(); + unsigned ReflownColumn = + getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); + if (ReflownColumn > ColumnLimit) { + insertBreak(LineIndex, 0, BreakSplit, Whitespaces); + } return; + } + + // Here no reflow with the previous line will happen. + // Fix the decoration of the line at LineIndex. StringRef Prefix = Decoration; - if (Lines[LineIndex].empty()) { + if (Content[LineIndex].empty()) { if (LineIndex + 1 == Lines.size()) { if (!LastLineNeedsDecoration) { // If the last line was empty, we don't need a prefix, as the */ will @@ -418,19 +615,35 @@ void BreakableBlockComment::replaceWhitespaceBefore( Prefix = Prefix.substr(0, 1); } } else { - if (StartOfLineColumn[LineIndex] == 1) { + if (ContentColumn[LineIndex] == 1) { // This line starts immediately after the decorating *. Prefix = Prefix.substr(0, 1); } } - - unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - - Tok.TokenText.data() - - LeadingWhitespace[LineIndex]; + // This is the offset of the end of the last line relative to the start of the + // token text in the token. + unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + + Content[LineIndex - 1].size() - + tokenAt(LineIndex).TokenText.data(); + unsigned WhitespaceLength = Content[LineIndex].data() - + tokenAt(LineIndex).TokenText.data() - + WhitespaceOffsetInToken; Whitespaces.replaceWhitespaceInToken( - Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, - InPPDirective, 1, IndentLevel, - StartOfLineColumn[LineIndex] - Prefix.size()); + tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix, + InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); +} + +bool BreakableBlockComment::mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const { + // Content[LineIndex] may exclude the indent after the '*' decoration. In that + // case, we compute the start of the comment pragma manually. + StringRef IndentContent = Content[LineIndex]; + if (Lines[LineIndex].ltrim(Blanks).startswith("*")) { + IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1); + } + return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && + mayReflowContent(Content[LineIndex]) && !Tok.Finalized && + !switchesFormatting(tokenAt(LineIndex)); } unsigned @@ -439,7 +652,248 @@ BreakableBlockComment::getContentStartColumn(unsigned LineIndex, // If we break, we always break at the predefined indent. if (TailOffset != 0) return IndentAtLineBreak; - return std::max(0, StartOfLineColumn[LineIndex]); + return std::max(0, ContentColumn[LineIndex]); +} + +BreakableLineCommentSection::BreakableLineCommentSection( + const FormatToken &Token, unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { + assert(Tok.is(TT_LineComment) && + "line comment section must start with a line comment"); + FormatToken *LineTok = nullptr; + for (const FormatToken *CurrentTok = &Tok; + CurrentTok && CurrentTok->is(TT_LineComment); + CurrentTok = CurrentTok->Next) { + LastLineTok = LineTok; + StringRef TokenText(CurrentTok->TokenText); + assert(TokenText.startswith("//")); + size_t FirstLineIndex = Lines.size(); + TokenText.split(Lines, "\n"); + Content.resize(Lines.size()); + ContentColumn.resize(Lines.size()); + OriginalContentColumn.resize(Lines.size()); + Tokens.resize(Lines.size()); + Prefix.resize(Lines.size()); + OriginalPrefix.resize(Lines.size()); + for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { + // We need to trim the blanks in case this is not the first line in a + // multiline comment. Then the indent is included in Lines[i]. + StringRef IndentPrefix = + getLineCommentIndentPrefix(Lines[i].ltrim(Blanks)); + assert(IndentPrefix.startswith("//")); + OriginalPrefix[i] = Prefix[i] = IndentPrefix; + if (Lines[i].size() > Prefix[i].size() && + isAlphanumeric(Lines[i][Prefix[i].size()])) { + if (Prefix[i] == "//") + Prefix[i] = "// "; + else if (Prefix[i] == "///") + Prefix[i] = "/// "; + else if (Prefix[i] == "//!") + Prefix[i] = "//! "; + } + + Tokens[i] = LineTok; + Content[i] = Lines[i].substr(IndentPrefix.size()); + OriginalContentColumn[i] = + StartColumn + + encoding::columnWidthWithTabs(OriginalPrefix[i], + StartColumn, + Style.TabWidth, + Encoding); + ContentColumn[i] = + StartColumn + + encoding::columnWidthWithTabs(Prefix[i], + StartColumn, + Style.TabWidth, + Encoding); + + // Calculate the end of the non-whitespace text in this line. + size_t EndOfLine = Content[i].find_last_not_of(Blanks); + if (EndOfLine == StringRef::npos) + EndOfLine = Content[i].size(); + else + ++EndOfLine; + Content[i] = Content[i].substr(0, EndOfLine); + } + LineTok = CurrentTok->Next; + if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) { + // A line comment section needs to broken by a line comment that is + // preceded by at least two newlines. Note that we put this break here + // instead of breaking at a previous stage during parsing, since that + // would split the contents of the enum into two unwrapped lines in this + // example, which is undesirable: + // enum A { + // a, // comment about a + // + // // comment about b + // b + // }; + // + // FIXME: Consider putting separate line comment sections as children to + // the unwrapped line instead. + break; + } + } +} + +unsigned BreakableLineCommentSection::getLineLengthAfterSplit( + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { + unsigned ContentStartColumn = + (TailOffset == 0 ? ContentColumn[LineIndex] + : OriginalContentColumn[LineIndex]); + return ContentStartColumn + encoding::columnWidthWithTabs( + Content[LineIndex].substr(TailOffset, Length), + ContentStartColumn, Style.TabWidth, Encoding); +} + +void BreakableLineCommentSection::insertBreak(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { + StringRef Text = Content[LineIndex].substr(TailOffset); + // Compute the offset of the split relative to the beginning of the token + // text. + unsigned BreakOffsetInToken = + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + // Compute the size of the new indent, including the size of the new prefix of + // the newly broken line. + unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] + + Prefix[LineIndex].size() - + OriginalPrefix[LineIndex].size(); + assert(IndentAtLineBreak >= Prefix[LineIndex].size()); + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", + Prefix[LineIndex], InPPDirective, /*Newlines=*/1, + /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size()); +} + +BreakableComment::Split BreakableLineCommentSection::getSplitBefore( + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + if (!mayReflow(LineIndex, CommentPragmasRegex)) + return Split(StringRef::npos, 0); + return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, + ColumnLimit); +} + +unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore( + unsigned LineIndex, unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + if (SplitBefore.first == StringRef::npos || + SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { + // A piece of line, not the whole line, gets reflown. + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } else { + // The whole line gets reflown. + unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); + return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex], + StartColumn, + Style.TabWidth, + Encoding); + } +} + +void BreakableLineCommentSection::replaceWhitespaceBefore( + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + Split SplitBefore, WhitespaceManager &Whitespaces) { + // If this is the first line of a token, we need to inform Whitespace Manager + // about it: either adapt the whitespace range preceding it, or mark it as an + // untouchable token. + // This happens for instance here: + // // line 1 \ + // // line 2 + if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { + if (SplitBefore.first != StringRef::npos) { + // Reflow happens between tokens. Replace the whitespace between the + // tokens by the empty string. + Whitespaces.replaceWhitespace( + *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, + /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); + // Replace the indent and prefix of the token with the reflow prefix. + unsigned WhitespaceLength = + Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); + Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], + /*Offset=*/0, + /*ReplaceChars=*/WhitespaceLength, + /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, + /*InPPDirective=*/false, + /*Newlines=*/0, + /*Spaces=*/0); + } else { + // This is the first line for the current token, but no reflow with the + // previous token is necessary. However, we still may need to adjust the + // start column. Note that ContentColumn[LineIndex] is the expected + // content column after a possible update to the prefix, hence the prefix + // length change is included. + unsigned LineColumn = + ContentColumn[LineIndex] - + (Content[LineIndex].data() - Lines[LineIndex].data()) + + (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); + + // We always want to create a replacement instead of adding an untouchable + // token, even if LineColumn is the same as the original column of the + // token. This is because WhitespaceManager doesn't align trailing + // comments if they are untouchable. + Whitespaces.replaceWhitespace(*Tokens[LineIndex], + /*Newlines=*/1, + /*Spaces=*/LineColumn, + /*StartOfTokenColumn=*/LineColumn, + /*InPPDirective=*/false); + } + } + if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { + // Adjust the prefix if necessary. + + // Take care of the space possibly introduced after a decoration. + assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() && + "Expecting a line comment prefix to differ from original by at most " + "a space"); + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "", + /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); + } + // Add a break after a reflow split has been introduced, if necessary. + // Note that this break doesn't need to be penalized, since it doesn't change + // the number of lines. + if (SplitBefore.first != StringRef::npos && + SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { + insertBreak(LineIndex, 0, SplitBefore, Whitespaces); + } +} + +void BreakableLineCommentSection::updateNextToken(LineState& State) const { + if (LastLineTok) { + State.NextToken = LastLineTok->Next; + } +} + +bool BreakableLineCommentSection::mayReflow( + unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { + // Line comments have the indent as part of the prefix, so we need to + // recompute the start of the line. + StringRef IndentContent = Content[LineIndex]; + if (Lines[LineIndex].startswith("//")) { + IndentContent = Lines[LineIndex].substr(2); + } + return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && + mayReflowContent(Content[LineIndex]) && !Tok.Finalized && + !switchesFormatting(tokenAt(LineIndex)) && + OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; +} + +unsigned +BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const { + if (TailOffset != 0) { + return OriginalContentColumn[LineIndex]; + } + return ContentColumn[LineIndex]; } } // namespace format diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index eb1f9fda30711..e642a538e21c3 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -8,9 +8,10 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Declares BreakableToken, BreakableStringLiteral, and -/// BreakableBlockComment classes, that contain token type-specific logic to -/// break long lines in tokens. +/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment, +/// BreakableBlockComment and BreakableLineCommentSection classes, that contain +/// token type-specific logic to break long lines in tokens and reflow content +/// between tokens. /// //===----------------------------------------------------------------------===// @@ -20,15 +21,49 @@ #include "Encoding.h" #include "TokenAnnotator.h" #include "WhitespaceManager.h" +#include "llvm/Support/Regex.h" #include <utility> namespace clang { namespace format { +/// \brief Checks if \p Token switches formatting, like /* clang-format off */. +/// \p Token must be a comment. +bool switchesFormatting(const FormatToken &Token); + struct FormatStyle; /// \brief Base class for strategies on how to break tokens. /// +/// This is organised around the concept of a \c Split, which is a whitespace +/// range that signifies a position of the content of a token where a +/// reformatting might be done. Operating with splits is divided into 3 +/// operations: +/// - getSplit, for finding a split starting at a position, +/// - getLineLengthAfterSplit, for calculating the size in columns of the rest +/// of the content after a split has been used for breaking, and +/// - insertBreak, for executing the split using a whitespace manager. +/// +/// There is a pair of operations that are used to compress a long whitespace +/// range with a single space if that will bring the line lenght under the +/// column limit: +/// - getLineLengthAfterCompression, for calculating the size in columns of the +/// line after a whitespace range has been compressed, and +/// - compressWhitespace, for executing the whitespace compression using a +/// whitespace manager; note that the compressed whitespace may be in the +/// middle of the original line and of the reformatted line. +/// +/// For tokens where the whitespace before each line needs to be also +/// reformatted, for example for tokens supporting reflow, there are analogous +/// operations that might be executed before the main line breaking occurs: +/// - getSplitBefore, for finding a split such that the content preceding it +/// needs to be specially reflown, +/// - getLineLengthAfterSplitBefore, for calculating the line length in columns +/// of the remainder of the content after the beginning of the content has +/// been reformatted, and +/// - replaceWhitespaceBefore, for executing the reflow using a whitespace +/// manager. +/// /// FIXME: The interface seems set in stone, so we might want to just pull the /// strategy into the class, instead of controlling it from the outside. class BreakableToken { @@ -42,44 +77,85 @@ public: virtual unsigned getLineCount() const = 0; /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex, from byte offset \p Offset with length \p Length. + /// at \p LineIndex, from byte offset \p TailOffset with length \p Length. /// - /// Note that previous breaks are not taken into account. \p Offset is always - /// specified from the start of the (original) line. + /// Note that previous breaks are not taken into account. \p TailOffset is + /// always specified from the start of the (original) line. /// \p Length can be set to StringRef::npos, which means "to the end of line". virtual unsigned - getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset, + getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, StringRef::size_type Length) const = 0; /// \brief Returns a range (offset, length) at which to break the line at /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not /// violate \p ColumnLimit. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const = 0; + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const = 0; /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) = 0; + /// \brief Returns the number of columns required to format the piece of line + /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range + /// \p Split has been compressed into a single space. + unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns, + Split Split) const; + /// \brief Replaces the whitespace range described by \p Split with a single /// space. - virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces) = 0; + virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset, + Split Split, + WhitespaceManager &Whitespaces) = 0; + + /// \brief Returns a whitespace range (offset, length) of the content at + /// \p LineIndex such that the content preceding this range needs to be + /// reformatted before any breaks are made to this line. + /// + /// \p PreviousEndColumn is the end column of the previous line after + /// formatting. + /// + /// A result having offset == StringRef::npos means that no piece of the line + /// needs to be reformatted before any breaks are made. + virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + return Split(StringRef::npos, 0); + } + + /// \brief Returns the number of columns required to format the piece of line + /// at \p LineIndex after the content preceding the whitespace range specified + /// \p SplitBefore has been reformatted, but before any breaks are made to + /// this line. + virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, + unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. + /// Performs a reformatting of the content at \p LineIndex preceding the + /// whitespace range \p SplitBefore. virtual void replaceWhitespaceBefore(unsigned LineIndex, + unsigned PreviousEndColumn, + unsigned ColumnLimit, Split SplitBefore, WhitespaceManager &Whitespaces) {} + /// \brief Updates the next token of \p State to the next token after this + /// one. This can be used when this token manages a set of underlying tokens + /// as a unit and is responsible for the formatting of the them. + virtual void updateNextToken(LineState &State) const {} + protected: - BreakableToken(const FormatToken &Tok, unsigned IndentLevel, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style) - : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective), - Encoding(Encoding), Style(Style) {} + BreakableToken(const FormatToken &Tok, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding), + Style(Style) {} const FormatToken &Tok; - const unsigned IndentLevel; const bool InPPDirective; const encoding::Encoding Encoding; const FormatStyle &Style; @@ -95,10 +171,9 @@ public: StringRef::size_type Length) const override; protected: - BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel, - unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, - encoding::Encoding Encoding, + BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, + StringRef Prefix, StringRef Postfix, + bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); // The column in which the token starts. @@ -117,107 +192,139 @@ public: /// /// \p StartColumn specifies the column in which the token will start /// after formatting. - BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel, - unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style); + BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, + StringRef Prefix, StringRef Postfix, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); - Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const override; + Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; - void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override {} + void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override {} }; -class BreakableLineComment : public BreakableSingleLineToken { -public: - /// \brief Creates a breakable token for a line comment. +class BreakableComment : public BreakableToken { +protected: + /// \brief Creates a breakable token for a comment. /// - /// \p StartColumn specifies the column in which the comment will start - /// after formatting. - BreakableLineComment(const FormatToken &Token, unsigned IndentLevel, - unsigned StartColumn, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style); + /// \p StartColumn specifies the column in which the comment will start after + /// formatting. + BreakableComment(const FormatToken &Token, unsigned StartColumn, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); - Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const override; - void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - void replaceWhitespaceBefore(unsigned LineIndex, - WhitespaceManager &Whitespaces) override; +public: + unsigned getLineCount() const override; + Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; + void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; -private: - // The prefix without an additional space if one was added. - StringRef OriginalPrefix; +protected: + virtual unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const = 0; + + // Returns a split that divides Text into a left and right parts, such that + // the left part is suitable for reflowing after PreviousEndColumn. + Split getReflowSplit(StringRef Text, StringRef ReflowPrefix, + unsigned PreviousEndColumn, unsigned ColumnLimit) const; + + // Returns the token containing the line at LineIndex. + const FormatToken &tokenAt(unsigned LineIndex) const; + + // Checks if the content of line LineIndex may be reflown with the previous + // line. + virtual bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const = 0; + + // Contains the original text of the lines of the block comment. + // + // In case of a block comments, excludes the leading /* in the first line and + // trailing */ in the last line. In case of line comments, excludes the + // leading // and spaces. + SmallVector<StringRef, 16> Lines; + + // Contains the text of the lines excluding all leading and trailing + // whitespace between the lines. Note that the decoration (if present) is also + // not considered part of the text. + SmallVector<StringRef, 16> Content; + + // Tokens[i] contains a reference to the token containing Lines[i] if the + // whitespace range before that token is managed by this block. + // Otherwise, Tokens[i] is a null pointer. + SmallVector<FormatToken *, 16> Tokens; + + // ContentColumn[i] is the target column at which Content[i] should be. + // Note that this excludes a leading "* " or "*" in case of block comments + // where all lines have a "*" prefix, or the leading "// " or "//" in case of + // line comments. + // + // In block comments, the first line's target column is always positive. The + // remaining lines' target columns are relative to the first line to allow + // correct indentation of comments in \c WhitespaceManager. Thus they can be + // negative as well (in case the first line needs to be unindented more than + // there's actual whitespace in another line). + SmallVector<int, 16> ContentColumn; + + // The intended start column of the first line of text from this section. + unsigned StartColumn; + + // The prefix to use in front a line that has been reflown up. + // For example, when reflowing the second line after the first here: + // // comment 1 + // // comment 2 + // we expect: + // // comment 1 comment 2 + // and not: + // // comment 1comment 2 + StringRef ReflowPrefix = " "; }; -class BreakableBlockComment : public BreakableToken { +class BreakableBlockComment : public BreakableComment { public: - /// \brief Creates a breakable token for a block comment. - /// - /// \p StartColumn specifies the column in which the comment will start - /// after formatting, while \p OriginalStartColumn specifies in which - /// column the comment started before formatting. - /// If the comment starts a line after formatting, set \p FirstInLine to true. - BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel, - unsigned StartColumn, unsigned OriginaStartColumn, - bool FirstInLine, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style); + BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); - unsigned getLineCount() const override; unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, StringRef::size_type Length) const override; - Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; - void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - void replaceWhitespaceBefore(unsigned LineIndex, + Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; + unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, + unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const override; + void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, Split SplitBefore, WhitespaceManager &Whitespaces) override; + bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const override; private: - // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], - // so that all whitespace between the lines is accounted to Lines[LineIndex] - // as leading whitespace: - // - Lines[LineIndex] points to the text after that whitespace - // - Lines[LineIndex-1] shrinks by its trailing whitespace - // - LeadingWhitespace[LineIndex] is updated with the complete whitespace - // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] + // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex]. // - // Sets StartOfLineColumn to the intended column in which the text at + // Updates Content[LineIndex-1] and Content[LineIndex] by stripping off + // leading and trailing whitespace. + // + // Sets ContentColumn to the intended column in which the text at // Lines[LineIndex] starts (note that the decoration, if present, is not // considered part of the text). void adjustWhitespace(unsigned LineIndex, int IndentDelta); - // Returns the column at which the text in line LineIndex starts, when broken - // at TailOffset. Note that the decoration (if present) is not considered part - // of the text. - unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; - - // Contains the text of the lines of the block comment, excluding the leading - // /* in the first line and trailing */ in the last line, and excluding all - // trailing whitespace between the lines. Note that the decoration (if - // present) is also not considered part of the text. - SmallVector<StringRef, 16> Lines; + // Computes the end column if the full Content from LineIndex gets reflown + // after PreviousEndColumn. + unsigned getReflownColumn(StringRef Content, unsigned LineIndex, + unsigned PreviousEndColumn) const; - // LeadingWhitespace[i] is the number of characters regarded as whitespace in - // front of Lines[i]. Note that this can include "* " sequences, which we - // regard as whitespace when all lines have a "*" prefix. - SmallVector<unsigned, 16> LeadingWhitespace; - - // StartOfLineColumn[i] is the target column at which Line[i] should be. - // Note that this excludes a leading "* " or "*" in case all lines have - // a "*" prefix. - // The first line's target column is always positive. The remaining lines' - // target columns are relative to the first line to allow correct indentation - // of comments in \c WhitespaceManager. Thus they can be negative as well (in - // case the first line needs to be unindented more than there's actual - // whitespace in another line). - SmallVector<int, 16> StartOfLineColumn; + unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const override; // The column at which the text of a broken line should start. // Note that an optional decoration would go before that column. @@ -237,8 +344,69 @@ private: // Either "* " if all lines begin with a "*", or empty. StringRef Decoration; + + // If this block comment has decorations, this is the column of the start of + // the decorations. + unsigned DecorationColumn; }; +class BreakableLineCommentSection : public BreakableComment { +public: + BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); + + unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; + Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; + unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, + unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const override; + void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, Split SplitBefore, + WhitespaceManager &Whitespaces) override; + void updateNextToken(LineState &State) const override; + bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const override; + +private: + unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const override; + + // OriginalPrefix[i] contains the original prefix of line i, including + // trailing whitespace before the start of the content. The indentation + // preceding the prefix is not included. + // For example, if the line is: + // // content + // then the original prefix is "// ". + SmallVector<StringRef, 16> OriginalPrefix; + + // Prefix[i] contains the intended leading "//" with trailing spaces to + // account for the indentation of content within the comment at line i after + // formatting. It can be different than the original prefix when the original + // line starts like this: + // //content + // Then the original prefix is "//", but the prefix is "// ". + SmallVector<StringRef, 16> Prefix; + + SmallVector<unsigned, 16> OriginalContentColumn; + + /// \brief The token to which the last line of this breakable token belongs + /// to; nullptr if that token is the initial token. + /// + /// The distinction is because if the token of the last line of this breakable + /// token is distinct from the initial token, this breakable token owns the + /// whitespace before the token of the last line, and the whitespace manager + /// must be able to modify it. + FormatToken *LastLineTok = nullptr; +}; } // namespace format } // namespace clang diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt index c977c2d3c5fa1..0c7511c1bb07e 100644 --- a/lib/Format/CMakeLists.txt +++ b/lib/Format/CMakeLists.txt @@ -3,11 +3,11 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangFormat AffectedRangeManager.cpp BreakableToken.cpp - Comments.cpp ContinuationIndenter.cpp Format.cpp FormatToken.cpp FormatTokenLexer.cpp + NamespaceEndCommentsFixer.cpp SortJavaScriptImports.cpp TokenAnalyzer.cpp TokenAnnotator.cpp diff --git a/lib/Format/Comments.cpp b/lib/Format/Comments.cpp deleted file mode 100644 index 1b27f5b30a603..0000000000000 --- a/lib/Format/Comments.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===--- Comments.cpp - Comment Manipulation -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Implements comment manipulation. -/// -//===----------------------------------------------------------------------===// - -#include "Comments.h" - -namespace clang { -namespace format { - -StringRef getLineCommentIndentPrefix(StringRef Comment) { - static const char *const KnownPrefixes[] = {"///", "//", "//!"}; - StringRef LongestPrefix; - for (StringRef KnownPrefix : KnownPrefixes) { - if (Comment.startswith(KnownPrefix)) { - size_t PrefixLength = KnownPrefix.size(); - while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') - ++PrefixLength; - if (PrefixLength > LongestPrefix.size()) - LongestPrefix = Comment.substr(0, PrefixLength); - } - } - return LongestPrefix; -} - -} // namespace format -} // namespace clang diff --git a/lib/Format/Comments.h b/lib/Format/Comments.h deleted file mode 100644 index 59f0596361a5c..0000000000000 --- a/lib/Format/Comments.h +++ /dev/null @@ -1,33 +0,0 @@ -//===--- Comments.cpp - Comment manipulation -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Declares comment manipulation functionality. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_COMMENTS_H -#define LLVM_CLANG_LIB_FORMAT_COMMENTS_H - -#include "clang/Basic/LLVM.h" -#include "llvm/ADT/StringRef.h" - -namespace clang { -namespace format { - -/// \brief Returns the comment prefix of the line comment \p Comment. -/// -/// The comment prefix consists of a leading known prefix, like "//" or "///", -/// together with the following whitespace. -StringRef getLineCommentIndentPrefix(StringRef Comment); - -} // namespace format -} // namespace clang - -#endif diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 6bb6fb3060352..73ae10a29f8fb 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -20,7 +20,7 @@ #include "clang/Format/Format.h" #include "llvm/Support/Debug.h" -#define DEBUG_TYPE "format-formatter" +#define DEBUG_TYPE "format-indenter" namespace clang { namespace format { @@ -57,8 +57,10 @@ static bool startsNextParameter(const FormatToken &Current, Style.BreakConstructorInitializersBeforeComma) return true; return Previous.is(tok::comma) && !Current.isTrailingComment() && - (Previous.isNot(TT_CtorInitializerComma) || - !Style.BreakConstructorInitializersBeforeComma); + ((Previous.isNot(TT_CtorInitializerComma) || + !Style.BreakConstructorInitializersBeforeComma) && + (Previous.isNot(TT_InheritanceComma) || + !Style.BreakBeforeInheritanceComma)); } ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, @@ -80,7 +82,7 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, State.Column = FirstIndent; State.Line = Line; State.NextToken = Line->First; - State.Stack.push_back(ParenState(FirstIndent, Line->Level, FirstIndent, + State.Stack.push_back(ParenState(FirstIndent, FirstIndent, /*AvoidBinPacking=*/false, /*NoLineBreak=*/false)); State.LineContainsContinuedForLoopSection = false; @@ -135,6 +137,12 @@ bool ContinuationIndenter::canBreak(const LineState &State) { return false; } + // If binary operators are moved to the next line (including commas for some + // styles of constructor initializers), that's always ok. + if (!Current.isOneOf(TT_BinaryOperator, tok::comma) && + State.Stack.back().NoLineBreakInOperand) + return false; + return !State.Stack.back().NoLineBreak; } @@ -150,7 +158,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { return true; if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) && - Style.Language == FormatStyle::LK_Cpp && + Style.isCpp() && // FIXME: This is a temporary workaround for the case where clang-format // sets BreakBeforeParameter to avoid bin packing and this creates a // completely unnecessary line break after a template type that isn't @@ -191,6 +199,18 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { Current.NestingLevel < State.StartOfLineLevel)) return true; + if (startsSegmentOfBuilderTypeCall(Current) && + (State.Stack.back().CallContinuation != 0 || + State.Stack.back().BreakBeforeParameter) && + // JavaScript is treated different here as there is a frequent pattern: + // SomeFunction(function() { + // ... + // }.bind(...)); + // FIXME: We should find a more generic solution to this problem. + !(State.Column <= NewLineColumn && + Style.Language == FormatStyle::LK_JavaScript)) + return true; + if (State.Column <= NewLineColumn) return false; @@ -255,11 +275,6 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { !Previous.is(tok::kw_template) && State.Stack.back().BreakBeforeParameter) return true; - if (startsSegmentOfBuilderTypeCall(Current) && - (State.Stack.back().CallContinuation != 0 || - State.Stack.back().BreakBeforeParameter)) - return true; - // The following could be precomputed as they do not depend on the state. // However, as they should take effect only if the UnwrappedLine does not fit // into the ColumnLimit, they are checked here in the ContinuationIndenter. @@ -334,8 +349,13 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces; if (!DryRun) - Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, /*IndentLevel=*/0, - Spaces, State.Column + Spaces); + Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces, + State.Column + Spaces); + + // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance + // declaration unless there is multiple inheritance. + if (Style.BreakBeforeInheritanceComma && Current.is(TT_InheritanceColon)) + State.Stack.back().NoLineBreak = true; if (Current.is(TT_SelectorName) && !State.Stack.back().ObjCSelectorNameFound) { @@ -370,6 +390,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Current.FakeLParens.size() > 0 && Current.FakeLParens.back() > prec::Unknown) State.Stack.back().NoLineBreak = true; + if (Previous.is(TT_TemplateString) && Previous.opensScope()) + State.Stack.back().NoLineBreak = true; if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) && @@ -385,7 +407,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, State.Stack.back().NoLineBreak = true; if (Current.isMemberAccess() && Previous.is(tok::r_paren) && (Previous.MatchingParen && - (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) { + (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) // If there is a function call with long parameters, break before trailing // calls. This prevents things like: // EXPECT_CALL(SomeLongParameter).Times( @@ -393,6 +415,31 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // We don't want to do this for short parameters as they can just be // indexes. State.Stack.back().NoLineBreak = true; + + // Don't allow the RHS of an operator to be split over multiple lines unless + // there is a line-break right after the operator. + // Exclude relational operators, as there, it is always more desirable to + // have the LHS 'left' of the RHS. + const FormatToken *P = Current.getPreviousNonComment(); + if (!Current.is(tok::comment) && P && + (P->isOneOf(TT_BinaryOperator, tok::comma) || + (P->is(TT_ConditionalExpr) && P->is(tok::colon))) && + !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) && + P->getPrecedence() != prec::Assignment && + P->getPrecedence() != prec::Relational) { + bool BreakBeforeOperator = + P->MustBreakBefore || P->is(tok::lessless) || + (P->is(TT_BinaryOperator) && + Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) || + (P->is(TT_ConditionalExpr) && Style.BreakBeforeTernaryOperators); + // Don't do this if there are only two operands. In these cases, there is + // always a nice vertical separation between them and the extra line break + // does not help. + bool HasTwoOperands = + P->OperatorIndex == 0 && !P->NextOperator && !P->is(TT_ConditionalExpr); + if ((!BreakBeforeOperator && !(HasTwoOperands && Style.AlignOperands)) || + (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator)) + State.Stack.back().NoLineBreakInOperand = true; } State.Column += Spaces; @@ -540,9 +587,8 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, if (!DryRun) { unsigned Newlines = std::max( 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); - Whitespaces.replaceWhitespace(Current, Newlines, - State.Stack.back().IndentLevel, State.Column, - State.Column, State.Line->InPPDirective); + Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column, + State.Line->InPPDirective); } if (!Current.isTrailingComment()) @@ -559,9 +605,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // Any break on this level means that the parent level has been broken // and we need to avoid bin packing there. bool NestedBlockSpecialCase = - Style.Language != FormatStyle::LK_Cpp && - Style.Language != FormatStyle::LK_ObjC && - Current.is(tok::r_brace) && State.Stack.size() > 1 && + !Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 && State.Stack[State.Stack.size() - 2].NestedBlockInlined; if (!NestedBlockSpecialCase) for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) @@ -580,7 +624,9 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // If we break after { or the [ of an array initializer, we should also break // before the corresponding } or ]. if (PreviousNonComment && - (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))) + (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || + (PreviousNonComment->is(TT_TemplateString) && + PreviousNonComment->opensScope()))) State.Stack.back().BreakBeforeClosingBrace = true; if (State.Stack.back().AvoidBinPacking) { @@ -628,14 +674,16 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return State.Stack[State.Stack.size() - 2].LastSpace; return State.FirstIndent; } + if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope()) + return State.Stack[State.Stack.size() - 2].LastSpace; if (Current.is(tok::identifier) && Current.Next && Current.Next->is(TT_DictLiteral)) return State.Stack.back().Indent; - if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) - return State.StartOfStringLiteral; if (NextNonComment->is(TT_ObjCStringLiteral) && State.StartOfStringLiteral != 0) return State.StartOfStringLiteral - 1; + if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) + return State.StartOfStringLiteral; if (NextNonComment->is(tok::lessless) && State.Stack.back().FirstLessLess != 0) return State.Stack.back().FirstLessLess; @@ -696,10 +744,11 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (PreviousNonComment && PreviousNonComment->is(tok::colon) && PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) return ContinuationIndent; - if (NextNonComment->is(TT_CtorInitializerColon)) - return State.FirstIndent + Style.ConstructorInitializerIndentWidth; if (NextNonComment->is(TT_CtorInitializerComma)) return State.Stack.back().Indent; + if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon, + TT_InheritanceComma)) + return State.FirstIndent + Style.ConstructorInitializerIndentWidth; if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() && !Current.isOneOf(tok::colon, tok::comment)) return ContinuationIndent; @@ -716,6 +765,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, assert(State.Stack.size()); const FormatToken &Current = *State.NextToken; + if (Current.isOneOf(tok::comma, TT_BinaryOperator)) + State.Stack.back().NoLineBreakInOperand = false; if (Current.is(TT_InheritanceColon)) State.Stack.back().AvoidBinPacking = true; if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) { @@ -724,8 +775,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, else State.Stack.back().LastOperatorWrapped = Newline; } - if ((Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless)) || - Current.is(TT_ConditionalExpr)) + if (Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless)) + State.Stack.back().LastOperatorWrapped = Newline; + if (Current.is(TT_ConditionalExpr) && Current.Previous && + !Current.Previous->is(TT_ConditionalExpr)) State.Stack.back().LastOperatorWrapped = Newline; if (Current.is(TT_ArraySubscriptLSquare) && State.Stack.back().StartOfArraySubscripts == 0) @@ -765,9 +818,14 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Stack.back().AvoidBinPacking = true; State.Stack.back().BreakBeforeParameter = false; } + if (Current.is(TT_InheritanceColon)) + State.Stack.back().Indent = + State.FirstIndent + Style.ContinuationIndentWidth; if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline) State.Stack.back().NestedBlockIndent = State.Column + Current.ColumnWidth + 1; + if (Current.isOneOf(TT_LambdaLSquare, TT_LambdaArrow)) + State.Stack.back().LastSpace = State.Column; // Insert scopes created by fake parenthesis. const FormatToken *Previous = Current.getPreviousNonComment(); @@ -795,21 +853,30 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, } moveStatePastFakeLParens(State, Newline); - moveStatePastScopeOpener(State, Newline); moveStatePastScopeCloser(State); + if (Current.is(TT_TemplateString) && Current.opensScope()) + State.Stack.back().LastSpace = + (Current.IsMultiline ? Current.LastLineColumnWidth + : State.Column + Current.ColumnWidth) - + strlen("${"); + bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak && + !State.Stack.back().NoLineBreakInOperand; + moveStatePastScopeOpener(State, Newline); moveStatePastFakeRParens(State); - if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) - State.StartOfStringLiteral = State.Column; if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column + 1; + else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) + State.StartOfStringLiteral = State.Column; else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && !Current.isStringLiteral()) State.StartOfStringLiteral = 0; State.Column += Current.ColumnWidth; State.NextToken = State.NextToken->Next; - unsigned Penalty = breakProtrudingToken(Current, State, DryRun); + unsigned Penalty = 0; + if (CanBreakProtrudingToken) + Penalty = breakProtrudingToken(Current, State, DryRun); if (State.Column > getColumnLimit(State)) { unsigned ExcessCharacters = State.Column - getColumnLimit(State); Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; @@ -848,6 +915,9 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, I != E; ++I) { ParenState NewParenState = State.Stack.back(); NewParenState.ContainsLineBreak = false; + NewParenState.LastOperatorWrapped = true; + NewParenState.NoLineBreak = + NewParenState.NoLineBreak || State.Stack.back().NoLineBreakInOperand; // Indent from 'LastSpace' unless these are fake parentheses encapsulating // a builder type call after 'return' or, if the alignment after opening @@ -862,24 +932,6 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, std::max(std::max(State.Column, NewParenState.Indent), State.Stack.back().LastSpace); - // Don't allow the RHS of an operator to be split over multiple lines unless - // there is a line-break right after the operator. - // Exclude relational operators, as there, it is always more desirable to - // have the LHS 'left' of the RHS. - if (Previous && Previous->getPrecedence() != prec::Assignment && - Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, tok::comma) && - Previous->getPrecedence() != prec::Relational) { - bool BreakBeforeOperator = - Previous->is(tok::lessless) || - (Previous->is(TT_BinaryOperator) && - Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) || - (Previous->is(TT_ConditionalExpr) && - Style.BreakBeforeTernaryOperators); - if ((!Newline && !BreakBeforeOperator) || - (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator)) - NewParenState.NoLineBreak = true; - } - // Do not indent relative to the fake parentheses inserted for "." or "->". // This is a special case to make the following to statements consistent: // OuterFunction(InnerFunctionCall( // break @@ -931,7 +983,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, } unsigned NewIndent; - unsigned NewIndentLevel = State.Stack.back().IndentLevel; unsigned LastSpace = State.Stack.back().LastSpace; bool AvoidBinPacking; bool BreakBeforeParameter = false; @@ -941,7 +992,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, if (Current.opensBlockOrBlockTypeList(Style)) { NewIndent = State.Stack.back().NestedBlockIndent + Style.IndentWidth; NewIndent = std::min(State.Column + 2, NewIndent); - ++NewIndentLevel; } else { NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth; } @@ -966,12 +1016,23 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, // int> v); // FIXME: We likely want to do this for more combinations of brackets. // Verify that it is wanted for ObjC, too. - if (Current.Tok.getKind() == tok::less && - Current.ParentBracket == tok::l_paren) { + if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) { NewIndent = std::max(NewIndent, State.Stack.back().Indent); LastSpace = std::max(LastSpace, State.Stack.back().Indent); } + // JavaScript template strings are special as we always want to indent + // nested expressions relative to the ${}. Otherwise, this can create quite + // a mess. + if (Current.is(TT_TemplateString)) { + unsigned Column = Current.IsMultiline + ? Current.LastLineColumnWidth + : State.Column + Current.ColumnWidth; + NewIndent = Column; + LastSpace = Column; + NestedBlockIndent = Column; + } + AvoidBinPacking = (State.Line->MustBeDeclaration && !Style.BinPackParameters) || (!State.Line->MustBeDeclaration && !Style.BinPackArguments) || @@ -1003,17 +1064,15 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, // Generally inherit NoLineBreak from the current scope to nested scope. // However, don't do this for non-empty nested blocks, dict literals and // array literals as these follow different indentation rules. - const FormatToken *Previous = Current.getPreviousNonComment(); bool NoLineBreak = Current.Children.empty() && !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) && (State.Stack.back().NoLineBreak || + State.Stack.back().NoLineBreakInOperand || (Current.is(TT_TemplateOpener) && - State.Stack.back().ContainsUnwrappedBuilder) || - (Current.is(tok::l_brace) && !Newline && Previous && - Previous->is(tok::comma))); - State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, LastSpace, - AvoidBinPacking, NoLineBreak)); + State.Stack.back().ContainsUnwrappedBuilder)); + State.Stack.push_back( + ParenState(NewIndent, LastSpace, AvoidBinPacking, NoLineBreak)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1; @@ -1027,7 +1086,7 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { // If we encounter a closing ), ], } or >, we can remove a level from our // stacks. if (State.Stack.size() > 1 && - (Current.isOneOf(tok::r_paren, tok::r_square) || + (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) || (Current.is(tok::r_brace) && State.NextToken != State.Line->First) || State.NextToken->is(TT_TemplateCloser))) State.Stack.pop_back(); @@ -1047,10 +1106,9 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace) ? Style.ObjCBlockIndentWidth : Style.IndentWidth); - State.Stack.push_back(ParenState( - NewIndent, /*NewIndentLevel=*/State.Stack.back().IndentLevel + 1, - State.Stack.back().LastSpace, /*AvoidBinPacking=*/true, - /*NoLineBreak=*/false)); + State.Stack.push_back(ParenState(NewIndent, State.Stack.back().LastSpace, + /*AvoidBinPacking=*/true, + /*NoLineBreak=*/false)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = true; } @@ -1117,44 +1175,42 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, StringRef Text = Current.TokenText; StringRef Prefix; StringRef Postfix; - bool IsNSStringLiteral = false; // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to // reduce the overhead) for each FormatToken, which is a string, so that we // don't run multiple checks here on the hot path. - if (Text.startswith("\"") && Current.Previous && - Current.Previous->is(tok::at)) { - IsNSStringLiteral = true; - Prefix = "@\""; - } if ((Text.endswith(Postfix = "\"") && - (IsNSStringLiteral || Text.startswith(Prefix = "\"") || + (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - Token.reset(new BreakableStringLiteral( - Current, State.Line->Level, StartColumn, Prefix, Postfix, - State.Line->InPPDirective, Encoding, Style)); + Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix, + Postfix, State.Line->InPPDirective, + Encoding, Style)); } else { return 0; } } else if (Current.is(TT_BlockComment)) { if (!Current.isTrailingComment() || !Style.ReflowComments || - CommentPragmasRegex.match(Current.TokenText.substr(2))) + // If a comment token switches formatting, like + // /* clang-format on */, we don't want to break it further, + // but we may still want to adjust its indentation. + switchesFormatting(Current)) return addMultilineToken(Current, State); Token.reset(new BreakableBlockComment( - Current, State.Line->Level, StartColumn, Current.OriginalColumn, - !Current.Previous, State.Line->InPPDirective, Encoding, Style)); + Current, StartColumn, Current.OriginalColumn, !Current.Previous, + State.Line->InPPDirective, Encoding, Style)); } else if (Current.is(TT_LineComment) && (Current.Previous == nullptr || Current.Previous->isNot(TT_ImplicitStringLiteral))) { if (!Style.ReflowComments || - CommentPragmasRegex.match(Current.TokenText.substr(2))) + CommentPragmasRegex.match(Current.TokenText.substr(2)) || + switchesFormatting(Current)) return 0; - Token.reset(new BreakableLineComment(Current, State.Line->Level, - StartColumn, /*InPPDirective=*/false, - Encoding, Style)); + Token.reset(new BreakableLineCommentSection( + Current, StartColumn, Current.OriginalColumn, !Current.Previous, + /*InPPDirective=*/false, Encoding, Style)); // We don't insert backslashes when breaking line comments. ColumnLimit = Style.ColumnLimit; } else { @@ -1165,18 +1221,30 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; bool BreakInserted = false; + // We use a conservative reflowing strategy. Reflow starts after a line is + // broken or the corresponding whitespace compressed. Reflow ends as soon as a + // line that doesn't get reflown with the previous line is reached. + bool ReflowInProgress = false; unsigned Penalty = 0; unsigned RemainingTokenColumns = 0; for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { + BreakableToken::Split SplitBefore(StringRef::npos, 0); + if (ReflowInProgress) { + SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, + RemainingSpace, CommentPragmasRegex); + } + ReflowInProgress = SplitBefore.first != StringRef::npos; + unsigned TailOffset = + ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; if (!DryRun) - Token->replaceWhitespaceBefore(LineIndex, Whitespaces); - unsigned TailOffset = 0; - RemainingTokenColumns = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, + RemainingSpace, SplitBefore, Whitespaces); + RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( + LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); while (RemainingTokenColumns > RemainingSpace) { - BreakableToken::Split Split = - Token->getSplit(LineIndex, TailOffset, ColumnLimit); + BreakableToken::Split Split = Token->getSplit( + LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); if (Split.first == StringRef::npos) { // The last line's penalty is handled in addNextStateToQueue(). if (LineIndex < EndIndex - 1) @@ -1185,17 +1253,23 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, break; } assert(Split.first != 0); - unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); - // We can remove extra whitespace instead of breaking the line. - if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) { - RemainingTokenColumns = 0; + // Check if compressing the whitespace range will bring the line length + // under the limit. If that is the case, we perform whitespace compression + // instead of inserting a line break. + unsigned RemainingTokenColumnsAfterCompression = + Token->getLineLengthAfterCompression(RemainingTokenColumns, Split); + if (RemainingTokenColumnsAfterCompression <= RemainingSpace) { + RemainingTokenColumns = RemainingTokenColumnsAfterCompression; + ReflowInProgress = true; if (!DryRun) - Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces); + Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); break; } + unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( + LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. @@ -1213,6 +1287,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, } TailOffset += Split.first + Split.second; RemainingTokenColumns = NewRemainingTokenColumns; + ReflowInProgress = true; BreakInserted = true; } } @@ -1233,6 +1308,9 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, State.Stack.back().LastSpace = StartColumn; } + + Token->updateNextToken(State); + return Penalty; } diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h index 21ad653c4fa4c..9a06aa6f62672 100644 --- a/lib/Format/ContinuationIndenter.h +++ b/lib/Format/ContinuationIndenter.h @@ -146,12 +146,12 @@ private: }; struct ParenState { - ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, - bool AvoidBinPacking, bool NoLineBreak) - : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), - NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), - AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), - NoLineBreak(NoLineBreak), LastOperatorWrapped(true), + ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, + bool NoLineBreak) + : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent), + BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking), + BreakBeforeParameter(false), NoLineBreak(NoLineBreak), + NoLineBreakInOperand(false), LastOperatorWrapped(true), ContainsLineBreak(false), ContainsUnwrappedBuilder(false), AlignColons(true), ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), NestedBlockInlined(false) {} @@ -160,9 +160,6 @@ struct ParenState { /// indented. unsigned Indent; - /// \brief The number of indentation levels of the block. - unsigned IndentLevel; - /// \brief The position of the last space on each level. /// /// Used e.g. to break like: @@ -224,6 +221,10 @@ struct ParenState { /// \brief Line breaking in this context would break a formatting rule. bool NoLineBreak : 1; + /// \brief Same as \c NoLineBreak, but is restricted until the end of the + /// operand (including the next ","). + bool NoLineBreakInOperand : 1; + /// \brief True if the last binary operator on this level was wrapped to the /// next line. bool LastOperatorWrapped : 1; diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 389761d482498..0e2da71343d5d 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -17,6 +17,7 @@ #include "AffectedRangeManager.h" #include "ContinuationIndenter.h" #include "FormatTokenLexer.h" +#include "NamespaceEndCommentsFixer.h" #include "SortJavaScriptImports.h" #include "TokenAnalyzer.h" #include "TokenAnnotator.h" @@ -297,6 +298,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals); IO.mapOptional("ColumnLimit", Style.ColumnLimit); IO.mapOptional("CommentPragmas", Style.CommentPragmas); + IO.mapOptional("BreakBeforeInheritanceComma", + Style.BreakBeforeInheritanceComma); IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", Style.ConstructorInitializerAllOnOneLineOrOnePerLine); IO.mapOptional("ConstructorInitializerIndentWidth", @@ -307,6 +310,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("DisableFormat", Style.DisableFormat); IO.mapOptional("ExperimentalAutoDetectBinPacking", Style.ExperimentalAutoDetectBinPacking); + IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments); IO.mapOptional("ForEachMacros", Style.ForEachMacros); IO.mapOptional("IncludeCategories", Style.IncludeCategories); IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex); @@ -421,6 +425,11 @@ std::error_code make_error_code(ParseError e) { return std::error_code(static_cast<int>(e), getParseCategory()); } +inline llvm::Error make_string_error(const llvm::Twine &Message) { + return llvm::make_error<llvm::StringError>(Message, + llvm::inconvertibleErrorCode()); +} + const char *ParseErrorCategory::name() const noexcept { return "clang-format.parse_error"; } @@ -514,6 +523,7 @@ FormatStyle getLLVMStyle() { false, false, false, false, false}; LLVMStyle.BreakAfterJavaFieldAnnotations = false; LLVMStyle.BreakConstructorInitializersBeforeComma = false; + LLVMStyle.BreakBeforeInheritanceComma = false; LLVMStyle.BreakStringLiterals = true; LLVMStyle.ColumnLimit = 80; LLVMStyle.CommentPragmas = "^ IWYU pragma:"; @@ -523,6 +533,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.Cpp11BracedListStyle = true; LLVMStyle.DerivePointerAlignment = false; LLVMStyle.ExperimentalAutoDetectBinPacking = false; + LLVMStyle.FixNamespaceComments = true; LLVMStyle.ForEachMacros.push_back("foreach"); LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); @@ -546,7 +557,6 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; - LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave; LLVMStyle.ReflowComments = true; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpacesInSquareBrackets = false; @@ -614,8 +624,9 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.BreakBeforeTernaryOperators = false; - GoogleStyle.CommentPragmas = - "(taze:|@(export|requirecss|return|returns|see|visibility)) "; + // taze:, @tag followed by { for a lot of JSDoc tags, and @see, which is + // commonly followed by overlong URLs. + GoogleStyle.CommentPragmas = "(taze:|(@[A-Za-z_0-9-]+[ \\t]*{)|@see)"; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; GoogleStyle.SpacesInContainerLiterals = false; @@ -648,8 +659,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { ChromiumStyle.AllowShortLoopsOnASingleLine = false; ChromiumStyle.BinPackParameters = false; ChromiumStyle.DerivePointerAlignment = false; + if (Language == FormatStyle::LK_ObjC) + ChromiumStyle.ColumnLimit = 80; } - ChromiumStyle.SortIncludes = false; return ChromiumStyle; } @@ -666,9 +678,11 @@ FormatStyle getMozillaStyle() { MozillaStyle.BinPackArguments = false; MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; MozillaStyle.BreakConstructorInitializersBeforeComma = true; + MozillaStyle.BreakBeforeInheritanceComma = true; MozillaStyle.ConstructorInitializerIndentWidth = 2; MozillaStyle.ContinuationIndentWidth = 2; MozillaStyle.Cpp11BracedListStyle = false; + MozillaStyle.FixNamespaceComments = false; MozillaStyle.IndentCaseLabels = true; MozillaStyle.ObjCSpaceAfterProperty = true; MozillaStyle.ObjCSpaceBeforeProtocolList = false; @@ -689,6 +703,7 @@ FormatStyle getWebKitStyle() { Style.BreakConstructorInitializersBeforeComma = true; Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 0; + Style.FixNamespaceComments = false; Style.IndentWidth = 4; Style.NamespaceIndentation = FormatStyle::NI_Inner; Style.ObjCBlockIndentWidth = 4; @@ -706,6 +721,7 @@ FormatStyle getGNUStyle() { Style.BreakBeforeTernaryOperators = true; Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 79; + Style.FixNamespaceComments = false; Style.SpaceBeforeParens = FormatStyle::SBPO_Always; Style.Standard = FormatStyle::LS_Cpp03; return Style; @@ -1457,12 +1473,22 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, return Replaces; } +bool isMpegTS(StringRef Code) { + // MPEG transport streams use the ".ts" file extension. clang-format should + // not attempt to format those. MPEG TS' frame format starts with 0x47 every + // 189 bytes - detect that and return. + return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47; +} + tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName, unsigned *Cursor) { tooling::Replacements Replaces; if (!Style.SortIncludes) return Replaces; + if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript && + isMpegTS(Code)) + return Replaces; if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript) return sortJavaScriptImports(Style, Code, Ranges, FileName); sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor); @@ -1531,8 +1557,8 @@ inline bool isHeaderDeletion(const tooling::Replacement &Replace) { // tokens and returns an offset after the sequence. unsigned getOffsetAfterTokenSequence( StringRef FileName, StringRef Code, const FormatStyle &Style, - std::function<unsigned(const SourceManager &, Lexer &, Token &)> - GetOffsetAfterSequense) { + llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> + GetOffsetAfterSequence) { std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); const SourceManager &SourceMgr = Env->getSourceManager(); @@ -1541,7 +1567,7 @@ unsigned getOffsetAfterTokenSequence( Token Tok; // Get the first token. Lex.LexFromRawLexer(Tok); - return GetOffsetAfterSequense(SourceMgr, Lex, Tok); + return GetOffsetAfterSequence(SourceMgr, Lex, Tok); } // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, @@ -1645,7 +1671,7 @@ bool isDeletedHeader(llvm::StringRef HeaderName, tooling::Replacements fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, const FormatStyle &Style) { - if (Style.Language != FormatStyle::LanguageKind::LK_Cpp) + if (!Style.isCpp()) return Replaces; tooling::Replacements HeaderInsertions; @@ -1808,23 +1834,36 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, FormatStyle Expanded = expandPresets(Style); if (Expanded.DisableFormat) return tooling::Replacements(); - + if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code)) + return tooling::Replacements(); auto Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); - if (Style.Language == FormatStyle::LK_JavaScript && - Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) { - JavaScriptRequoter Requoter(*Env, Expanded); - tooling::Replacements Requotes = Requoter.process(); - if (!Requotes.empty()) { - auto NewCode = applyAllReplacements(Code, Requotes); + auto reformatAfterApplying = [&] (TokenAnalyzer& Fixer) { + tooling::Replacements Fixes = Fixer.process(); + if (!Fixes.empty()) { + auto NewCode = applyAllReplacements(Code, Fixes); if (NewCode) { auto NewEnv = Environment::CreateVirtualEnvironment( *NewCode, FileName, - tooling::calculateRangesAfterReplacements(Requotes, Ranges)); + tooling::calculateRangesAfterReplacements(Fixes, Ranges)); Formatter Format(*NewEnv, Expanded, IncompleteFormat); - return Requotes.merge(Format.process()); + return Fixes.merge(Format.process()); } } + Formatter Format(*Env, Expanded, IncompleteFormat); + return Format.process(); + }; + + if (Style.Language == FormatStyle::LK_Cpp && + Style.FixNamespaceComments) { + NamespaceEndCommentsFixer CommentsFixer(*Env, Expanded); + return reformatAfterApplying(CommentsFixer); + } + + if (Style.Language == FormatStyle::LK_JavaScript && + Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) { + JavaScriptRequoter Requoter(*Env, Expanded); + return reformatAfterApplying(Requoter); } Formatter Format(*Env, Expanded, IncompleteFormat); @@ -1840,13 +1879,24 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, return Clean.process(); } +tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, + StringRef Code, + ArrayRef<tooling::Range> Ranges, + StringRef FileName) { + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + NamespaceEndCommentsFixer Fix(*Env, Style); + return Fix.process(); +} + LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOptions LangOpts; LangOpts.CPlusPlus = 1; LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; + LangOpts.CPlusPlus1z = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.LineComment = 1; - bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; + bool AlternativeOperators = Style.isCpp(); LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; LangOpts.Bool = 1; LangOpts.ObjC1 = 1; @@ -1882,9 +1932,9 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { return FormatStyle::LK_Cpp; } -FormatStyle getStyle(StringRef StyleName, StringRef FileName, - StringRef FallbackStyle, StringRef Code, - vfs::FileSystem *FS) { +llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, + StringRef FallbackStyleName, + StringRef Code, vfs::FileSystem *FS) { if (!FS) { FS = vfs::getRealFileSystem().get(); } @@ -1898,35 +1948,28 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName, (Code.contains("\n- (") || Code.contains("\n+ ("))) Style.Language = FormatStyle::LK_ObjC; - if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { - llvm::errs() << "Invalid fallback style \"" << FallbackStyle - << "\" using LLVM style\n"; - return Style; - } + FormatStyle FallbackStyle = getNoStyle(); + if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle)) + return make_string_error("Invalid fallback style \"" + FallbackStyleName); if (StyleName.startswith("{")) { // Parse YAML/JSON style from the command line. - if (std::error_code ec = parseConfiguration(StyleName, &Style)) { - llvm::errs() << "Error parsing -style: " << ec.message() << ", using " - << FallbackStyle << " style\n"; - } + if (std::error_code ec = parseConfiguration(StyleName, &Style)) + return make_string_error("Error parsing -style: " + ec.message()); return Style; } if (!StyleName.equals_lower("file")) { if (!getPredefinedStyle(StyleName, Style.Language, &Style)) - llvm::errs() << "Invalid value for -style, using " << FallbackStyle - << " style\n"; + return make_string_error("Invalid value for -style"); return Style; } // Look for .clang-format/_clang-format file in the file's parent directories. SmallString<128> UnsuitableConfigFiles; SmallString<128> Path(FileName); - if (std::error_code EC = FS->makeAbsolute(Path)) { - llvm::errs() << EC.message() << "\n"; - return Style; - } + if (std::error_code EC = FS->makeAbsolute(Path)) + return make_string_error(EC.message()); for (StringRef Directory = Path; !Directory.empty(); Directory = llvm::sys::path::parent_path(Directory)) { @@ -1943,25 +1986,23 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName, DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); Status = FS->status(ConfigFile.str()); - bool IsFile = + bool FoundConfigFile = Status && (Status->getType() == llvm::sys::fs::file_type::regular_file); - if (!IsFile) { + if (!FoundConfigFile) { // Try _clang-format too, since dotfiles are not commonly used on Windows. ConfigFile = Directory; llvm::sys::path::append(ConfigFile, "_clang-format"); DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); Status = FS->status(ConfigFile.str()); - IsFile = Status && - (Status->getType() == llvm::sys::fs::file_type::regular_file); + FoundConfigFile = Status && (Status->getType() == + llvm::sys::fs::file_type::regular_file); } - if (IsFile) { + if (FoundConfigFile) { llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = FS->getBufferForFile(ConfigFile.str()); - if (std::error_code EC = Text.getError()) { - llvm::errs() << EC.message() << "\n"; - break; - } + if (std::error_code EC = Text.getError()) + return make_string_error(EC.message()); if (std::error_code ec = parseConfiguration(Text.get()->getBuffer(), &Style)) { if (ec == ParseError::Unsuitable) { @@ -1970,20 +2011,18 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName, UnsuitableConfigFiles.append(ConfigFile); continue; } - llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message() - << "\n"; - break; + return make_string_error("Error reading " + ConfigFile + ": " + + ec.message()); } DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); return Style; } } - if (!UnsuitableConfigFiles.empty()) { - llvm::errs() << "Configuration file(s) do(es) not support " - << getLanguageName(Style.Language) << ": " - << UnsuitableConfigFiles << "\n"; - } - return Style; + if (!UnsuitableConfigFiles.empty()) + return make_string_error("Configuration file(s) do(es) not support " + + getLanguageName(Style.Language) + ": " + + UnsuitableConfigFiles); + return FallbackStyle; } } // namespace format diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index ea3bbe368d5b0..c9649126d93f5 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -48,11 +48,13 @@ namespace format { TYPE(FunctionTypeLParen) \ TYPE(ImplicitStringLiteral) \ TYPE(InheritanceColon) \ + TYPE(InheritanceComma) \ TYPE(InlineASMBrace) \ TYPE(InlineASMColon) \ TYPE(JavaAnnotation) \ TYPE(JsComputedPropertyName) \ TYPE(JsFatArrow) \ + TYPE(JsNonNullAssertion) \ TYPE(JsTypeColon) \ TYPE(JsTypeOperator) \ TYPE(JsTypeOptionalQuestion) \ @@ -220,6 +222,9 @@ struct FormatToken { /// [], {} or <>. unsigned NestingLevel = 0; + /// \brief The indent level of this token. Copied from the surrounding line. + unsigned IndentLevel = 0; + /// \brief Penalty for inserting a line break before this token. unsigned SplitPenalty = 0; @@ -258,6 +263,11 @@ struct FormatToken { /// Only set if \c Type == \c TT_StartOfName. bool PartOfMultiVariableDeclStmt = false; + /// \brief Does this line comment continue a line comment section? + /// + /// Only set to true if \c Type == \c TT_LineComment. + bool ContinuesLineCommentSection = false; + /// \brief If this is a bracket, this points to the matching one. FormatToken *MatchingParen = nullptr; @@ -334,11 +344,15 @@ struct FormatToken { /// \brief Returns whether \p Tok is ([{ or a template opening <. bool opensScope() const { + if (is(TT_TemplateString) && TokenText.endswith("${")) + return true; return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, TT_TemplateOpener); } /// \brief Returns whether \p Tok is )]} or a template closing >. bool closesScope() const { + if (is(TT_TemplateString) && TokenText.startswith("}")) + return true; return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, TT_TemplateCloser); } @@ -443,6 +457,8 @@ struct FormatToken { /// \brief Returns \c true if this tokens starts a block-type list, i.e. a /// list that should be indented with a block indent. bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { + if (is(TT_TemplateString) && opensScope()) + return true; return is(TT_ArrayInitializerLSquare) || (is(tok::l_brace) && (BlockKind == BK_Block || is(TT_DictLiteral) || @@ -451,6 +467,8 @@ struct FormatToken { /// \brief Same as opensBlockOrBlockTypeList, but for the closing token. bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { + if (is(TT_TemplateString) && closesScope()) + return true; return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); } @@ -618,6 +636,8 @@ struct AdditionalKeywords { kw_synchronized = &IdentTable.get("synchronized"); kw_throws = &IdentTable.get("throws"); kw___except = &IdentTable.get("__except"); + kw___has_include = &IdentTable.get("__has_include"); + kw___has_include_next = &IdentTable.get("__has_include_next"); kw_mark = &IdentTable.get("mark"); @@ -644,6 +664,8 @@ struct AdditionalKeywords { IdentifierInfo *kw_NS_ENUM; IdentifierInfo *kw_NS_OPTIONS; IdentifierInfo *kw___except; + IdentifierInfo *kw___has_include; + IdentifierInfo *kw___has_include_next; // JavaScript keywords. IdentifierInfo *kw_as; diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp index 46a32a917dd93..4ee43d6937e00 100644 --- a/lib/Format/FormatTokenLexer.cpp +++ b/lib/Format/FormatTokenLexer.cpp @@ -64,6 +64,8 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; if (tryMergeLessLess()) return; + if (tryMergeNSStringLiteral()) + return; if (Style.Language == FormatStyle::LK_JavaScript) { static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; @@ -82,6 +84,35 @@ void FormatTokenLexer::tryMergePreviousTokens() { if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) return; } + + if (Style.Language == FormatStyle::LK_Java) { + static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater, + tok::greater, + tok::greater}; + static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater, + tok::greater, + tok::greaterequal}; + if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator)) + return; + if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator)) + return; + } +} + +bool FormatTokenLexer::tryMergeNSStringLiteral() { + if (Tokens.size() < 2) + return false; + auto &At = *(Tokens.end() - 2); + auto &String = *(Tokens.end() - 1); + if (!At->is(tok::at) || !String->is(tok::string_literal)) + return false; + At->Tok.setKind(tok::string_literal); + At->TokenText = StringRef(At->TokenText.begin(), + String->TokenText.end() - At->TokenText.begin()); + At->ColumnWidth += String->ColumnWidth; + At->Type = TT_ObjCStringLiteral; + Tokens.erase(Tokens.end() - 1); + return true; } bool FormatTokenLexer::tryMergeLessLess() { @@ -157,7 +188,9 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) { // postfix unary operators. If the '++' is followed by a non-operand // introducing token, the slash here is the operand and not the start of a // regex. - if (Prev->isOneOf(tok::plusplus, tok::minusminus)) + // `!` is an unary prefix operator, but also a post-fix operator that casts + // away nullability, so the same check applies. + if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim)) return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3])); // The previous token must introduce an operand location where regex @@ -558,8 +591,7 @@ FormatToken *FormatTokenLexer::getNextToken() { Column = FormatTok->LastLineColumnWidth; } - if (Style.Language == FormatStyle::LK_Cpp || - Style.Language == FormatStyle::LK_ObjC) { + if (Style.isCpp()) { if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_define) && diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h index c47b0e725d366..bf10f09cd11e1 100644 --- a/lib/Format/FormatTokenLexer.h +++ b/lib/Format/FormatTokenLexer.h @@ -47,6 +47,7 @@ private: void tryMergePreviousTokens(); bool tryMergeLessLess(); + bool tryMergeNSStringLiteral(); bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp new file mode 100644 index 0000000000000..88cf123c18990 --- /dev/null +++ b/lib/Format/NamespaceEndCommentsFixer.cpp @@ -0,0 +1,175 @@ +//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that +/// fixes namespace end comments. +/// +//===----------------------------------------------------------------------===// + +#include "NamespaceEndCommentsFixer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Regex.h" + +#define DEBUG_TYPE "namespace-end-comments-fixer" + +namespace clang { +namespace format { + +namespace { +// The maximal number of unwrapped lines that a short namespace spans. +// Short namespaces don't need an end comment. +static const int kShortNamespaceMaxLines = 1; + +// Matches a valid namespace end comment. +// Valid namespace end comments don't need to be edited. +static llvm::Regex kNamespaceCommentPattern = + llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" + "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", + llvm::Regex::IgnoreCase); + +// Computes the name of a namespace given the namespace token. +// Returns "" for anonymous namespace. +std::string computeName(const FormatToken *NamespaceTok) { + assert(NamespaceTok && NamespaceTok->is(tok::kw_namespace) && + "expecting a namespace token"); + std::string name = ""; + // Collects all the non-comment tokens between 'namespace' and '{'. + const FormatToken *Tok = NamespaceTok->getNextNonComment(); + while (Tok && !Tok->is(tok::l_brace)) { + name += Tok->TokenText; + Tok = Tok->getNextNonComment(); + } + return name; +} + +std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline) { + std::string text = "// namespace"; + if (!NamespaceName.empty()) { + text += ' '; + text += NamespaceName; + } + if (AddNewline) + text += '\n'; + return text; +} + +bool hasEndComment(const FormatToken *RBraceTok) { + return RBraceTok->Next && RBraceTok->Next->is(tok::comment); +} + +bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName) { + assert(hasEndComment(RBraceTok)); + const FormatToken *Comment = RBraceTok->Next; + SmallVector<StringRef, 7> Groups; + if (kNamespaceCommentPattern.match(Comment->TokenText, &Groups)) { + StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : ""; + // Anonymous namespace comments must not mention a namespace name. + if (NamespaceName.empty() && !NamespaceNameInComment.empty()) + return false; + StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : ""; + // Named namespace comments must not mention anonymous namespace. + if (!NamespaceName.empty() && !AnonymousInComment.empty()) + return false; + return NamespaceNameInComment == NamespaceName; + } + return false; +} + +void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, + const SourceManager &SourceMgr, + tooling::Replacements *Fixes) { + auto EndLoc = RBraceTok->Tok.getEndLoc(); + auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc); + auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); + if (Err) { + llvm::errs() << "Error while adding namespace end comment: " + << llvm::toString(std::move(Err)) << "\n"; + } +} + +void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, + const SourceManager &SourceMgr, + tooling::Replacements *Fixes) { + assert(hasEndComment(RBraceTok)); + const FormatToken *Comment = RBraceTok->Next; + auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(), + Comment->Tok.getEndLoc()); + auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); + if (Err) { + llvm::errs() << "Error while updating namespace end comment: " + << llvm::toString(std::move(Err)) << "\n"; + } +} +} // namespace + +NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, + const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} + +tooling::Replacements NamespaceEndCommentsFixer::analyze( + TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) { + const SourceManager &SourceMgr = Env.getSourceManager(); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), + AnnotatedLines.end()); + tooling::Replacements Fixes; + for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { + if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective || + !AnnotatedLines[I]->startsWith(tok::r_brace)) + continue; + const AnnotatedLine *EndLine = AnnotatedLines[I]; + size_t StartLineIndex = EndLine->MatchingOpeningBlockLineIndex; + if (StartLineIndex == UnwrappedLine::kInvalidIndex) + continue; + assert(StartLineIndex < E); + const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First; + // Detect "(inline)? namespace" in the beginning of a line. + if (NamespaceTok->is(tok::kw_inline)) + NamespaceTok = NamespaceTok->getNextNonComment(); + if (!NamespaceTok || NamespaceTok->isNot(tok::kw_namespace)) + continue; + FormatToken *RBraceTok = EndLine->First; + if (RBraceTok->Finalized) + continue; + RBraceTok->Finalized = true; + const FormatToken *EndCommentPrevTok = RBraceTok; + // Namespaces often end with '};'. In that case, attach namespace end + // comments to the semicolon tokens. + if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) { + EndCommentPrevTok = RBraceTok->Next; + } + // The next token in the token stream after the place where the end comment + // token must be. This is either the next token on the current line or the + // first token on the next line. + const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next; + if (EndCommentNextTok && EndCommentNextTok->is(tok::comment)) + EndCommentNextTok = EndCommentNextTok->Next; + if (!EndCommentNextTok && I + 1 < E) + EndCommentNextTok = AnnotatedLines[I + 1]->First; + bool AddNewline = EndCommentNextTok && + EndCommentNextTok->NewlinesBefore == 0 && + EndCommentNextTok->isNot(tok::eof); + const std::string NamespaceName = computeName(NamespaceTok); + const std::string EndCommentText = + computeEndCommentText(NamespaceName, AddNewline); + if (!hasEndComment(EndCommentPrevTok)) { + bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1; + if (!isShort) + addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); + continue; + } + if (!validEndComment(EndCommentPrevTok, NamespaceName)) + updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); + } + return Fixes; +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h new file mode 100644 index 0000000000000..7790668a2e829 --- /dev/null +++ b/lib/Format/NamespaceEndCommentsFixer.h @@ -0,0 +1,37 @@ +//===--- NamespaceEndCommentsFixer.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that +/// fixes namespace end comments. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H +#define LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H + +#include "TokenAnalyzer.h" + +namespace clang { +namespace format { + +class NamespaceEndCommentsFixer : public TokenAnalyzer { +public: + NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style); + + tooling::Replacements + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override; +}; + +} // end namespace format +} // end namespace clang + +#endif diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index b5f7de280acd7..004800fc2a4e0 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -311,14 +311,13 @@ private: // In C++, this can happen either in array of templates (foo<int>[10]) // or when array is a nested template type (unique_ptr<type1<type2>[]>). bool CppArrayTemplates = - Style.Language == FormatStyle::LK_Cpp && Parent && + Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) && (Contexts.back().CanBeExpression || Contexts.back().IsExpression || Contexts.back().InTemplateArgument); bool StartsObjCMethodExpr = - !CppArrayTemplates && (Style.Language == FormatStyle::LK_Cpp || - Style.Language == FormatStyle::LK_ObjC) && + !CppArrayTemplates && Style.isCpp() && Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && CurrentToken->isNot(tok::l_brace) && (!Parent || @@ -337,6 +336,9 @@ private: Contexts.back().ContextKind == tok::l_brace && Parent->isOneOf(tok::l_brace, tok::comma)) { Left->Type = TT_JsComputedPropertyName; + } else if (CurrentToken->is(tok::r_square) && Parent && + Parent->is(TT_TemplateCloser)) { + Left->Type = TT_ArraySubscriptLSquare; } else if (Style.Language == FormatStyle::LK_Proto || (!CppArrayTemplates && Parent && Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, @@ -433,9 +435,7 @@ private: if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) { FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (((CurrentToken->is(tok::colon) && - (!Contexts.back().ColonIsDictLiteral || - (Style.Language != FormatStyle::LK_Cpp && - Style.Language != FormatStyle::LK_ObjC))) || + (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || Style.Language == FormatStyle::LK_Proto) && (Previous->Tok.getIdentifierInfo() || Previous->is(tok::string_literal))) @@ -676,6 +676,8 @@ private: case tok::comma: if (Contexts.back().InCtorInitializer) Tok->Type = TT_CtorInitializerComma; + else if (Contexts.back().InInheritanceList) + Tok->Type = TT_InheritanceComma; else if (Contexts.back().FirstStartOfName && (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) { Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; @@ -684,6 +686,12 @@ private: if (Contexts.back().IsForEachMacro) Contexts.back().IsExpression = true; break; + case tok::identifier: + if (Tok->isOneOf(Keywords.kw___has_include, + Keywords.kw___has_include_next)) { + parseHasInclude(); + } + break; default: break; } @@ -727,6 +735,14 @@ private: } } + void parseHasInclude() { + if (!CurrentToken || !CurrentToken->is(tok::l_paren)) + return; + next(); // '(' + parseIncludeDirective(); + next(); // ')' + } + LineType parsePreprocessorDirective() { bool IsFirstToken = CurrentToken->IsFirst; LineType Type = LT_PreprocessorDirective; @@ -777,8 +793,14 @@ private: default: break; } - while (CurrentToken) + while (CurrentToken) { + FormatToken *Tok = CurrentToken; next(); + if (Tok->isOneOf(Keywords.kw___has_include, + Keywords.kw___has_include_next)) { + parseHasInclude(); + } + } return Type; } @@ -885,7 +907,7 @@ private: TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_OverloadedOperator, TT_RegexLiteral, - TT_TemplateString)) + TT_TemplateString, TT_ObjCStringLiteral)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -925,6 +947,7 @@ private: bool CanBeExpression = true; bool InTemplateArgument = false; bool InCtorInitializer = false; + bool InInheritanceList = false; bool CaretFound = false; bool IsForEachMacro = false; }; @@ -984,6 +1007,9 @@ private: Current.Previous->is(TT_CtorInitializerColon)) { Contexts.back().IsExpression = true; Contexts.back().InCtorInitializer = true; + } else if (Current.Previous && + Current.Previous->is(TT_InheritanceColon)) { + Contexts.back().InInheritanceList = true; } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); @@ -1004,6 +1030,23 @@ private: // The token type is already known. return; + if (Style.Language == FormatStyle::LK_JavaScript) { + if (Current.is(tok::exclaim)) { + if (Current.Previous && + (Current.Previous->isOneOf(tok::identifier, tok::r_paren, + tok::r_square, tok::r_brace) || + Current.Previous->Tok.isLiteral())) { + Current.Type = TT_JsNonNullAssertion; + return; + } + if (Current.Next && + Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) { + Current.Type = TT_JsNonNullAssertion; + return; + } + } + } + // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. In this case, 'Current' is a // trailing token of this declaration and thus cannot be a name. @@ -1063,7 +1106,8 @@ private: if (Current.MatchingParen && Current.Next && !Current.Next->isBinaryOperator() && !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, - tok::period, tok::arrow, tok::coloncolon)) + tok::comma, tok::period, tok::arrow, + tok::coloncolon)) if (FormatToken *AfterParen = Current.MatchingParen->Next) { // Make sure this isn't the return type of an Obj-C block declaration if (AfterParen->Tok.isNot(tok::caret)) { @@ -1076,21 +1120,17 @@ private: } } } else if (Current.is(tok::at) && Current.Next) { - if (Current.Next->isStringLiteral()) { - Current.Type = TT_ObjCStringLiteral; - } else { - switch (Current.Next->Tok.getObjCKeywordID()) { - case tok::objc_interface: - case tok::objc_implementation: - case tok::objc_protocol: - Current.Type = TT_ObjCDecl; - break; - case tok::objc_property: - Current.Type = TT_ObjCProperty; - break; - default: - break; - } + switch (Current.Next->Tok.getObjCKeywordID()) { + case tok::objc_interface: + case tok::objc_implementation: + case tok::objc_protocol: + Current.Type = TT_ObjCDecl; + break; + case tok::objc_property: + Current.Type = TT_ObjCProperty; + break; + default: + break; } } else if (Current.is(tok::period)) { FormatToken *PreviousNoComment = Current.getPreviousNonComment(); @@ -1137,16 +1177,17 @@ private: if (Tok.isNot(tok::identifier) || !Tok.Previous) return false; - if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof)) + if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof, + Keywords.kw_as)) return false; if (Style.Language == FormatStyle::LK_JavaScript && Tok.Previous->is(Keywords.kw_in)) return false; // Skip "const" as it does not have an influence on whether this is a name. - FormatToken *PreviousNotConst = Tok.Previous; + FormatToken *PreviousNotConst = Tok.getPreviousNonComment(); while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) - PreviousNotConst = PreviousNotConst->Previous; + PreviousNotConst = PreviousNotConst->getPreviousNonComment(); if (!PreviousNotConst) return false; @@ -1175,9 +1216,7 @@ private: /// \brief Determine whether ')' is ending a cast. bool rParenEndsCast(const FormatToken &Tok) { // C-style casts are only used in C++ and Java. - if (Style.Language != FormatStyle::LK_Cpp && - Style.Language != FormatStyle::LK_ObjC && - Style.Language != FormatStyle::LK_Java) + if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java) return false; // Empty parens aren't casts and there are no casts at the end of the line. @@ -1282,7 +1321,8 @@ private: return TT_UnaryOperator; const FormatToken *NextToken = Tok.getNextNonComment(); - if (!NextToken || NextToken->isOneOf(tok::arrow, tok::equal) || + if (!NextToken || + NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) || (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) return TT_PointerOrReference; @@ -1445,7 +1485,9 @@ public: // At the end of the line or when an operator with higher precedence is // found, insert fake parenthesis and return. - if (!Current || (Current->closesScope() && Current->MatchingParen) || + if (!Current || + (Current->closesScope() && + (Current->MatchingParen || Current->is(TT_TemplateString))) || (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || (CurrentPrecedence == prec::Conditional && Precedence == prec::Assignment && Current->is(tok::colon))) { @@ -1454,7 +1496,9 @@ public: // Consume scopes: (), [], <> and {} if (Current->opensScope()) { - while (Current && !Current->closesScope()) { + // In fragment of a JavaScript template string can look like '}..${' and + // thus close a scope and open a new one at the same time. + while (Current && (!Current->closesScope() || Current->opensScope())) { next(); parse(); } @@ -1493,13 +1537,14 @@ private: return prec::Conditional; if (NextNonComment && NextNonComment->is(tok::colon) && NextNonComment->is(TT_DictLiteral)) - return prec::Comma; + return prec::Assignment; + if (Current->is(TT_JsComputedPropertyName)) + return prec::Assignment; if (Current->is(TT_LambdaArrow)) return prec::Comma; if (Current->is(TT_JsFatArrow)) return prec::Assignment; - if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName, - TT_JsComputedPropertyName) || + if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) || (Current->is(tok::comment) && NextNonComment && NextNonComment->is(TT_SelectorName))) return 0; @@ -1510,7 +1555,7 @@ private: Current->is(Keywords.kw_instanceof)) return prec::Relational; if (Style.Language == FormatStyle::LK_JavaScript && - Current->is(Keywords.kw_in)) + Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) return prec::Relational; if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) return Current->getPrecedence(); @@ -1594,8 +1639,14 @@ void TokenAnnotator::setCommentLineLevels( for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), E = Lines.rend(); I != E; ++I) { - if (NextNonCommentLine && (*I)->First->is(tok::comment) && - (*I)->First->Next == nullptr) + bool CommentLine = (*I)->First; + for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) { + if (!Tok->is(tok::comment)) { + CommentLine = false; + break; + } + } + if (NextNonCommentLine && CommentLine) (*I)->Level = NextNonCommentLine->Level; else NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; @@ -1697,7 +1748,7 @@ static bool isFunctionDeclarationName(const FormatToken &Current, } } - // Check whether parameter list can be long to a function declaration. + // Check whether parameter list can belong to a function declaration. if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen) return false; // If the lines ends with "{", this is likely an function definition. @@ -1711,6 +1762,10 @@ static bool isFunctionDeclarationName(const FormatToken &Current, return true; for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; Tok = Tok->Next) { + if (Tok->is(tok::l_paren) && Tok->MatchingParen) { + Tok = Tok->MatchingParen; + continue; + } if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) return true; @@ -1753,8 +1808,6 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Line.First->TotalLength = Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; - if (!Line.First->Next) - return; FormatToken *Current = Line.First->Next; bool InFunctionDecl = Line.MightBeFunctionDecl; while (Current) { @@ -1830,9 +1883,18 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { } calculateUnbreakableTailLengths(Line); + unsigned IndentLevel = Line.Level; for (Current = Line.First; Current != nullptr; Current = Current->Next) { if (Current->Role) Current->Role->precomputeFormattingInfos(Current); + if (Current->MatchingParen && + Current->MatchingParen->opensBlockOrBlockTypeList(Style)) { + assert(IndentLevel > 0); + --IndentLevel; + } + Current->IndentLevel = IndentLevel; + if (Current->opensBlockOrBlockTypeList(Style)) + ++IndentLevel; } DEBUG({ printDebugInfo(Line); }); @@ -1910,7 +1972,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Right.is(TT_LambdaArrow)) return 110; if (Left.is(tok::equal) && Right.is(tok::l_brace)) - return 150; + return 160; if (Left.is(TT_CastRParen)) return 100; if (Left.is(tok::coloncolon) || @@ -2167,7 +2229,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Left = *Right.Previous; if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) return true; // Never ever merge two identifiers. - if (Style.Language == FormatStyle::LK_Cpp) { + if (Style.isCpp()) { if (Left.is(tok::kw_operator)) return Right.is(tok::coloncolon); } else if (Style.Language == FormatStyle::LK_Proto) { @@ -2181,6 +2243,14 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(TT_JsFatArrow)) return true; + if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && + Right.MatchingParen) { + const FormatToken *Next = Right.MatchingParen->getNextNonComment(); + // An async arrow function, for example: `x = async () => foo();`, + // as opposed to calling a function called async: `x = async();` + if (Next && Next->is(TT_JsFatArrow)) + return true; + } if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) return false; @@ -2196,8 +2266,12 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Right.is(tok::l_paren) && Line.MustBeDeclaration && Left.Tok.getIdentifierInfo()) return false; - if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, - Keywords.kw_of, tok::kw_const) && + if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, + tok::kw_const) || + // "of" is only a keyword if it appears after another identifier + // (e.g. as "const x of y" in a for loop). + (Left.is(Keywords.kw_of) && Left.Previous && + Left.Previous->Tok.getIdentifierInfo())) && (!Left.Previous || !Left.Previous->is(tok::period))) return true; if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && @@ -2227,12 +2301,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // locations that should have whitespace following are identified by the // above set of follower tokens. return false; - // Postfix non-null assertion operator, as in `foo!.bar()`. - if (Right.is(tok::exclaim) && (Left.isOneOf(tok::identifier, tok::r_paren, - tok::r_square, tok::r_brace) || - Left.Tok.isLiteral())) + if (Right.is(TT_JsNonNullAssertion)) return false; - if (Left.is(tok::exclaim) && Right.is(Keywords.kw_as)) + if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as)) return true; // "x! as string" } else if (Style.Language == FormatStyle::LK_Java) { if (Left.is(tok::r_square) && Right.is(tok::l_brace)) @@ -2302,12 +2373,16 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (!Style.SpaceBeforeAssignmentOperators && Right.getPrecedence() == prec::Assignment) return false; + if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) + // Generally don't remove existing spaces between an identifier and "::". + // The identifier might actually be a macro name such as ALWAYS_INLINE. If + // this turns out to be too lenient, add analysis of the identifier itself. + return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment)) return (Left.is(TT_TemplateOpener) && Style.Standard == FormatStyle::LS_Cpp03) || - !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren, - tok::l_square) || - Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener)); + !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, + tok::kw___super, TT_TemplateCloser, TT_TemplateOpener)); if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return Style.SpacesInAngles; if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || @@ -2375,6 +2450,11 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && Right.Next->is(tok::string_literal)) return true; + } else if (Style.Language == FormatStyle::LK_Cpp || + Style.Language == FormatStyle::LK_ObjC || + Style.Language == FormatStyle::LK_Proto) { + if (Left.isStringLiteral() && Right.isStringLiteral()) + return true; } // If the last token before a '}' is a comma or a trailing comment, the @@ -2398,9 +2478,6 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); if (Left.isTrailingComment()) return true; - if (Left.isStringLiteral() && - (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral))) - return true; if (Right.Previous->IsUnterminatedLiteral) return true; if (Right.is(tok::lessless) && Right.Next && @@ -2416,6 +2493,10 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, Style.BreakConstructorInitializersBeforeComma && !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) return true; + // Break only if we have multiple inheritance. + if (Style.BreakBeforeInheritanceComma && + Right.is(TT_InheritanceComma)) + return true; if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) // Raw string literals are special wrt. line breaks. The author has made a // deliberate choice and might have aligned the contents of the string @@ -2458,11 +2539,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return true; } else if (Style.Language == FormatStyle::LK_JavaScript) { const FormatToken *NonComment = Right.getPreviousNonComment(); - if (Left.isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, - tok::kw_throw) || - (NonComment && - NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, - tok::kw_throw))) + if (NonComment && + NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, + tok::kw_throw, Keywords.kw_interface, + Keywords.kw_type)) return false; // Otherwise a semicolon is inserted. if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) return false; @@ -2476,6 +2556,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; if (Right.is(Keywords.kw_as)) return false; // must not break before as in 'x as type' casts + if (Left.is(Keywords.kw_as)) + return true; + if (Left.is(TT_JsNonNullAssertion)) + return true; if (Left.is(Keywords.kw_declare) && Right.isOneOf(Keywords.kw_module, tok::kw_namespace, Keywords.kw_function, tok::kw_class, tok::kw_enum, @@ -2485,9 +2569,12 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10 return false; if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && - Right.isOneOf(tok::identifier, tok::string_literal)) { + Right.isOneOf(tok::identifier, tok::string_literal)) return false; // must not break in "module foo { ...}" - } + if (Right.is(TT_TemplateString) && Right.closesScope()) + return false; + if (Left.is(TT_TemplateString) && Left.opensScope()) + return true; } if (Left.is(tok::at)) @@ -2590,6 +2677,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializersBeforeComma) return true; + if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) + return false; + if (Right.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) + return true; if ((Left.is(tok::greater) && Right.is(tok::greater)) || (Left.is(tok::less) && Right.is(tok::less))) return false; @@ -2615,7 +2706,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, tok::colon, tok::l_square, tok::at) || (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_const)) || - (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); + (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || + (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser)); } void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { @@ -2627,6 +2719,7 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { << " T=" << getTokenTypeName(Tok->Type) << " S=" << Tok->SpacesRequiredBefore << " B=" << Tok->BlockParameterCount + << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind << " FakeLParens="; diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index 97daaf44ba99e..805509533bf93 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -39,6 +39,7 @@ class AnnotatedLine { public: AnnotatedLine(const UnwrappedLine &Line) : First(Line.Tokens.front().Tok), Level(Line.Level), + MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), IsMultiVariableDeclStmt(false), Affected(false), @@ -109,6 +110,7 @@ public: LineType Type; unsigned Level; + size_t MatchingOpeningBlockLineIndex; bool InPPDirective; bool MustBeDeclaration; bool MightBeFunctionDecl; @@ -122,7 +124,7 @@ public: /// input ranges. bool LeadingEmptyLinesAffected; - /// \c True if a one of this line's children intersects with an input range. + /// \c True if one of this line's children intersects with an input range. bool ChildrenAffected; private: diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index d7f1c4232d860..c3c154afeb8a3 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -530,34 +530,33 @@ protected: if (Previous.Children[0]->First->MustBreakBefore) return false; - // Cannot merge multiple statements into a single line. - if (Previous.Children.size() > 1) - return false; - // Cannot merge into one line if this line ends on a comment. if (Previous.is(tok::comment)) return false; + // Cannot merge multiple statements into a single line. + if (Previous.Children.size() > 1) + return false; + + const AnnotatedLine *Child = Previous.Children[0]; // We can't put the closing "}" on a line with a trailing comment. - if (Previous.Children[0]->Last->isTrailingComment()) + if (Child->Last->isTrailingComment()) return false; // If the child line exceeds the column limit, we wouldn't want to merge it. // We add +2 for the trailing " }". if (Style.ColumnLimit > 0 && - Previous.Children[0]->Last->TotalLength + State.Column + 2 > - Style.ColumnLimit) + Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit) return false; if (!DryRun) { Whitespaces->replaceWhitespace( - *Previous.Children[0]->First, - /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, + *Child->First, /*Newlines=*/0, /*Spaces=*/1, /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); } - Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun); + Penalty += formatLine(*Child, State.Column + 1, DryRun); - State.Column += 1 + Previous.Children[0]->Last->TotalLength; + State.Column += 1 + Child->Last->TotalLength; return true; } @@ -841,8 +840,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, if (ShouldFormat && TheLine.Type != LT_Invalid) { if (!DryRun) - formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, - TheLine.InPPDirective); + formatFirstToken(TheLine, PreviousLine, Indent); NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); @@ -882,9 +880,8 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, TheLine.LeadingEmptyLinesAffected); // Format the first token. if (ReformatLeadingWhitespace) - formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, - TheLine.First->OriginalColumn, - TheLine.InPPDirective); + formatFirstToken(TheLine, PreviousLine, + TheLine.First->OriginalColumn); else Whitespaces->addUntouchableToken(*TheLine.First, TheLine.InPPDirective); @@ -904,15 +901,14 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, return Penalty; } -void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, +void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, - unsigned IndentLevel, - unsigned Indent, - bool InPPDirective) { + unsigned Indent) { + FormatToken& RootToken = *Line.First; if (RootToken.is(tok::eof)) { unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); - Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0, - /*Spaces=*/0, /*TargetColumn=*/0); + Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0, + /*StartOfTokenColumn=*/0); return; } unsigned Newlines = @@ -944,9 +940,9 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) Newlines = std::min(1u, Newlines); - Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, - Indent, InPPDirective && - !RootToken.HasUnescapedNewline); + Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent, + Line.InPPDirective && + !RootToken.HasUnescapedNewline); } unsigned diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h index 7bcead9d25e1a..93247f71d6e0f 100644 --- a/lib/Format/UnwrappedLineFormatter.h +++ b/lib/Format/UnwrappedLineFormatter.h @@ -44,9 +44,8 @@ public: private: /// \brief Add a new line and the required indent before the first Token /// of the \c UnwrappedLine if there was no structural parsing error. - void formatFirstToken(FormatToken &RootToken, - const AnnotatedLine *PreviousLine, unsigned IndentLevel, - unsigned Indent, bool InPPDirective); + void formatFirstToken(const AnnotatedLine &Line, + const AnnotatedLine *PreviousLine, unsigned Indent); /// \brief Returns the column limit for a line, taking into account whether we /// need an escaped newline due to a continued preprocessor directive. @@ -57,7 +56,8 @@ private: // starting from a specific additional offset. Improves performance if there // are many nested blocks. std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, - unsigned> PenaltyCache; + unsigned> + PenaltyCache; ContinuationIndenter *Indenter; WhitespaceManager *Whitespaces; diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 8fc3b78aee010..5be68ad5c6b82 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -202,7 +202,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr), + CurrentLines(&Lines), Style(Style), Keywords(Keywords), + CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} void UnwrappedLineParser::reset() { @@ -334,8 +335,11 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { case tok::l_brace: if (Style.Language == FormatStyle::LK_JavaScript && PrevTok && PrevTok->is(tok::colon)) - // In TypeScript's TypeMemberLists, there can be semicolons between the - // individual members. + // A colon indicates this code is in a type, or a braced list following + // a label in an object literal ({a: {b: 1}}). + // The code below could be confused by semicolons between the individual + // members in a type member list, which would normally trigger BK_Block. + // In both cases, this must be parsed as an inline braced init. Tok->BlockKind = BK_BracedInit; else Tok->BlockKind = BK_Unknown; @@ -424,6 +428,8 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, parseParens(); addUnwrappedLine(); + size_t OpeningLineIndex = + Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); @@ -449,6 +455,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, if (MunchSemi && FormatTok->Tok.is(tok::semi)) nextToken(); Line->Level = InitialLevel; + Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; } static bool isGoogScope(const UnwrappedLine &Line) { @@ -582,13 +589,14 @@ void UnwrappedLineParser::conditionalCompilationEnd() { } void UnwrappedLineParser::parsePPIf(bool IfDef) { + bool IfNDef = FormatTok->is(tok::pp_ifndef); nextToken(); - bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && - FormatTok->Tok.getLiteralData() != nullptr && - StringRef(FormatTok->Tok.getLiteralData(), - FormatTok->Tok.getLength()) == "0") || - FormatTok->Tok.is(tok::kw_false); - conditionalCompilationStart(!IfDef && IsLiteralFalse); + bool Unreachable = false; + if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) + Unreachable = true; + if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") + Unreachable = true; + conditionalCompilationStart(Unreachable); parsePPUnknown(); } @@ -746,8 +754,7 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, tok::minusminus))) return addUnwrappedLine(); - if ((PreviousMustBeValue || Previous->is(tok::r_brace)) && - isJSDeclOrStmt(Keywords, Next)) + if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) return addUnwrappedLine(); } @@ -909,7 +916,8 @@ void UnwrappedLineParser::parseStructuralElement() { return; } } - if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, + if (Style.isCpp() && + FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, Keywords.kw_slots, Keywords.kw_qslots)) { nextToken(); if (FormatTok->is(tok::colon)) { @@ -943,7 +951,7 @@ void UnwrappedLineParser::parseStructuralElement() { if (!parseEnum()) break; // This only applies for C++. - if (Style.Language != FormatStyle::LK_Cpp) { + if (!Style.isCpp()) { addUnwrappedLine(); return; } @@ -1124,7 +1132,7 @@ void UnwrappedLineParser::parseStructuralElement() { } bool UnwrappedLineParser::tryToParseLambda() { - if (Style.Language != FormatStyle::LK_Cpp) { + if (!Style.isCpp()) { nextToken(); return false; } @@ -1298,6 +1306,12 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { continue; } } + if (FormatTok->is(tok::l_brace)) { + // Could be a method inside of a braced list `{a() { return 1; }}`. + if (tryToParseBracedList()) + continue; + parseChildBlock(); + } } switch (FormatTok->Tok.getKind()) { case tok::caret: @@ -1309,12 +1323,6 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { case tok::l_square: tryToParseLambda(); break; - case tok::l_brace: - // Assume there are no blocks inside a braced init list apart - // from the ones we explicitly parse out (like lambdas). - FormatTok->BlockKind = BK_BracedInit; - parseBracedList(); - break; case tok::l_paren: parseParens(); // JavaScript can just have free standing methods and getters/setters in @@ -1325,6 +1333,12 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { break; } break; + case tok::l_brace: + // Assume there are no blocks inside a braced init list apart + // from the ones we explicitly parse out (like lambdas). + FormatTok->BlockKind = BK_BracedInit; + parseBracedList(); + break; case tok::r_brace: nextToken(); return !HasError; @@ -1381,6 +1395,12 @@ void UnwrappedLineParser::parseParens() { if (FormatTok->Tok.is(tok::l_brace)) parseBracedList(); break; + case tok::kw_class: + if (Style.Language == FormatStyle::LK_JavaScript) + parseRecord(/*ParseAsExpr=*/true); + else + nextToken(); + break; case tok::identifier: if (Style.Language == FormatStyle::LK_JavaScript && (FormatTok->is(Keywords.kw_function) || @@ -1722,8 +1742,7 @@ bool UnwrappedLineParser::parseEnum() { nextToken(); // If there are two identifiers in a row, this is likely an elaborate // return type. In Java, this can be "implements", etc. - if (Style.Language == FormatStyle::LK_Cpp && - FormatTok->is(tok::identifier)) + if (Style.isCpp() && FormatTok->is(tok::identifier)) return false; } } @@ -1819,7 +1838,7 @@ void UnwrappedLineParser::parseJavaEnumBody() { addUnwrappedLine(); } -void UnwrappedLineParser::parseRecord() { +void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { const FormatToken &InitialToken = *FormatTok; nextToken(); @@ -1863,11 +1882,15 @@ void UnwrappedLineParser::parseRecord() { } } if (FormatTok->Tok.is(tok::l_brace)) { - if (ShouldBreakBeforeBrace(Style, InitialToken)) - addUnwrappedLine(); + if (ParseAsExpr) { + parseChildBlock(); + } else { + if (ShouldBreakBeforeBrace(Style, InitialToken)) + addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, - /*MunchSemi=*/false); + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, + /*MunchSemi=*/false); + } } // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "class A {} n, m;", @@ -1999,7 +2022,9 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) { - llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; + llvm::dbgs() << I->Tok->Tok.getName() << "[" + << "T=" << I->Tok->Type + << ", OC=" << I->Tok->OriginalColumn << "] "; } for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2024,6 +2049,7 @@ void UnwrappedLineParser::addUnwrappedLine() { }); CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); + Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), @@ -2039,13 +2065,139 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { FormatTok.NewlinesBefore > 0; } +static bool isLineComment(const FormatToken &FormatTok) { + return FormatTok.is(tok::comment) && + FormatTok.TokenText.startswith("//"); +} + +// Checks if \p FormatTok is a line comment that continues the line comment +// section on \p Line. +static bool continuesLineComment(const FormatToken &FormatTok, + const UnwrappedLine &Line, + llvm::Regex &CommentPragmasRegex) { + if (Line.Tokens.empty()) + return false; + + StringRef IndentContent = FormatTok.TokenText; + if (FormatTok.TokenText.startswith("//") || + FormatTok.TokenText.startswith("/*")) + IndentContent = FormatTok.TokenText.substr(2); + if (CommentPragmasRegex.match(IndentContent)) + return false; + + // If Line starts with a line comment, then FormatTok continues the comment + // section if its original column is greater or equal to the original start + // column of the line. + // + // Define the min column token of a line as follows: if a line ends in '{' or + // contains a '{' followed by a line comment, then the min column token is + // that '{'. Otherwise, the min column token of the line is the first token of + // the line. + // + // If Line starts with a token other than a line comment, then FormatTok + // continues the comment section if its original column is greater than the + // original start column of the min column token of the line. + // + // For example, the second line comment continues the first in these cases: + // + // // first line + // // second line + // + // and: + // + // // first line + // // second line + // + // and: + // + // int i; // first line + // // second line + // + // and: + // + // do { // first line + // // second line + // int i; + // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // b + // }; + // + // The second line comment doesn't continue the first in these cases: + // + // // first line + // // second line + // + // and: + // + // int i; // first line + // // second line + // + // and: + // + // do { // first line + // // second line + // int i; + // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // }; + const FormatToken *MinColumnToken = Line.Tokens.front().Tok; + + // Scan for '{//'. If found, use the column of '{' as a min column for line + // comment section continuation. + const FormatToken *PreviousToken = nullptr; + for (const UnwrappedLineNode &Node : Line.Tokens) { + if (PreviousToken && PreviousToken->is(tok::l_brace) && + isLineComment(*Node.Tok)) { + MinColumnToken = PreviousToken; + break; + } + PreviousToken = Node.Tok; + + // Grab the last newline preceding a token in this unwrapped line. + if (Node.Tok->NewlinesBefore > 0) { + MinColumnToken = Node.Tok; + } + } + if (PreviousToken && PreviousToken->is(tok::l_brace)) { + MinColumnToken = PreviousToken; + } + + unsigned MinContinueColumn = + MinColumnToken->OriginalColumn + + (isLineComment(*MinColumnToken) ? 0 : 1); + return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && + isLineComment(*(Line.Tokens.back().Tok)) && + FormatTok.OriginalColumn >= MinContinueColumn; +} + void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); for (SmallVectorImpl<FormatToken *>::const_iterator I = CommentsBeforeNextToken.begin(), E = CommentsBeforeNextToken.end(); I != E; ++I) { - if (isOnNewLine(**I) && JustComments) + // Line comments that belong to the same line comment section are put on the + // same line since later we might want to reflow content between them. + // Additional fine-grained breaking of line comment sections is controlled + // by the class BreakableLineCommentSection in case it is desirable to keep + // several line comment sections in the same unwrapped line. + // + // FIXME: Consider putting separate line comment sections as children to the + // unwrapped line instead. + (*I)->ContinuesLineCommentSection = + continuesLineComment(**I, *Line, CommentPragmasRegex); + if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) addUnwrappedLine(); pushToken(*I); } @@ -2073,13 +2225,71 @@ const FormatToken *UnwrappedLineParser::getPreviousToken() { return Line->Tokens.back().Tok; } +void UnwrappedLineParser::distributeComments( + const SmallVectorImpl<FormatToken *> &Comments, + const FormatToken *NextTok) { + // Whether or not a line comment token continues a line is controlled by + // the method continuesLineComment, with the following caveat: + // + // Define a trail of Comments to be a nonempty proper postfix of Comments such + // that each comment line from the trail is aligned with the next token, if + // the next token exists. If a trail exists, the beginning of the maximal + // trail is marked as a start of a new comment section. + // + // For example in this code: + // + // int a; // line about a + // // line 1 about b + // // line 2 about b + // int b; + // + // the two lines about b form a maximal trail, so there are two sections, the + // first one consisting of the single comment "// line about a" and the + // second one consisting of the next two comments. + if (Comments.empty()) + return; + bool ShouldPushCommentsInCurrentLine = true; + bool HasTrailAlignedWithNextToken = false; + unsigned StartOfTrailAlignedWithNextToken = 0; + if (NextTok) { + // We are skipping the first element intentionally. + for (unsigned i = Comments.size() - 1; i > 0; --i) { + if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { + HasTrailAlignedWithNextToken = true; + StartOfTrailAlignedWithNextToken = i; + } + } + } + for (unsigned i = 0, e = Comments.size(); i < e; ++i) { + FormatToken *FormatTok = Comments[i]; + if (HasTrailAlignedWithNextToken && + i == StartOfTrailAlignedWithNextToken) { + FormatTok->ContinuesLineCommentSection = false; + } else { + FormatTok->ContinuesLineCommentSection = + continuesLineComment(*FormatTok, *Line, CommentPragmasRegex); + } + if (!FormatTok->ContinuesLineCommentSection && + (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { + ShouldPushCommentsInCurrentLine = false; + } + if (ShouldPushCommentsInCurrentLine) { + pushToken(FormatTok); + } else { + CommentsBeforeNextToken.push_back(FormatTok); + } + } +} + void UnwrappedLineParser::readToken() { - bool CommentsInCurrentLine = true; + SmallVector<FormatToken *, 1> Comments; do { FormatTok = Tokens->getNextToken(); assert(FormatTok); while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { + distributeComments(Comments, FormatTok); + Comments.clear(); // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = !Line->Tokens.empty(); @@ -2109,17 +2319,17 @@ void UnwrappedLineParser::readToken() { continue; } - if (!FormatTok->Tok.is(tok::comment)) + if (!FormatTok->Tok.is(tok::comment)) { + distributeComments(Comments, FormatTok); + Comments.clear(); return; - if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { - CommentsInCurrentLine = false; - } - if (CommentsInCurrentLine) { - pushToken(FormatTok); - } else { - CommentsBeforeNextToken.push_back(FormatTok); } + + Comments.push_back(FormatTok); } while (!eof()); + + distributeComments(Comments, nullptr); + Comments.clear(); } void UnwrappedLineParser::pushToken(FormatToken *Tok) { diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 9c78d33632c6f..15d1d9cda7a28 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -19,6 +19,7 @@ #include "FormatToken.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" +#include "llvm/Support/Regex.h" #include <list> #include <stack> @@ -47,6 +48,14 @@ struct UnwrappedLine { bool InPPDirective; bool MustBeDeclaration; + + /// \brief If this \c UnwrappedLine closes a block in a sequence of lines, + /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding + /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be + /// \c kInvalidIndex. + size_t MatchingOpeningBlockLineIndex; + + static const size_t kInvalidIndex = -1; }; class UnwrappedLineConsumer { @@ -99,7 +108,10 @@ private: void parseAccessSpecifier(); bool parseEnum(); void parseJavaEnumBody(); - void parseRecord(); + // Parses a record (aka class) as a top level element. If ParseAsExpr is true, + // parses the record as a child block, i.e. if the class declaration is an + // expression. + void parseRecord(bool ParseAsExpr = false); void parseObjCProtocolList(); void parseObjCUntilAtEnd(); void parseObjCInterfaceOrImplementation(); @@ -113,6 +125,21 @@ private: void nextToken(); const FormatToken *getPreviousToken(); void readToken(); + + // Decides which comment tokens should be added to the current line and which + // should be added as comments before the next token. + // + // Comments specifies the sequence of comment tokens to analyze. They get + // either pushed to the current line or added to the comments before the next + // token. + // + // NextTok specifies the next token. A null pointer NextTok is supported, and + // signifies either the absense of a next token, or that the next token + // shouldn't be taken into accunt for the analysis. + void distributeComments(const SmallVectorImpl<FormatToken *> &Comments, + const FormatToken *NextTok); + + // Adds the comment preceding the next token to unwrapped lines. void flushComments(bool NewlineBeforeNext); void pushToken(FormatToken *Tok); void calculateBraceTypes(bool ExpectClassBody = false); @@ -162,6 +189,8 @@ private: const FormatStyle &Style; const AdditionalKeywords &Keywords; + llvm::Regex CommentPragmasRegex; + FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; @@ -213,8 +242,8 @@ struct UnwrappedLineNode { SmallVector<UnwrappedLine, 0> Children; }; -inline UnwrappedLine::UnwrappedLine() - : Level(0), InPPDirective(false), MustBeDeclaration(false) {} +inline UnwrappedLine::UnwrappedLine() : Level(0), InPPDirective(false), + MustBeDeclaration(false), MatchingOpeningBlockLineIndex(kInvalidIndex) {} } // end namespace format } // end namespace clang diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index b64506f39035f..2c1f59324971f 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -25,64 +25,60 @@ operator()(const Change &C1, const Change &C2) const { C2.OriginalWhitespaceRange.getBegin()); } -WhitespaceManager::Change::Change( - bool CreateReplacement, SourceRange OriginalWhitespaceRange, - unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, - unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective, - bool IsStartOfDeclName, bool IsInsideToken) - : CreateReplacement(CreateReplacement), +WhitespaceManager::Change::Change(const FormatToken &Tok, + bool CreateReplacement, + SourceRange OriginalWhitespaceRange, + int Spaces, unsigned StartOfTokenColumn, + unsigned NewlinesBefore, + StringRef PreviousLinePostfix, + StringRef CurrentLinePrefix, + bool ContinuesPPDirective, bool IsInsideToken) + : Tok(&Tok), CreateReplacement(CreateReplacement), OriginalWhitespaceRange(OriginalWhitespaceRange), StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), PreviousLinePostfix(PreviousLinePostfix), - CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), - ContinuesPPDirective(ContinuesPPDirective), - IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel), - Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false), - TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), + CurrentLinePrefix(CurrentLinePrefix), + ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces), + IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0), + PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), StartOfBlockComment(nullptr), IndentationOffset(0) {} void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, - unsigned IndentLevel, unsigned Spaces, + unsigned Spaces, unsigned StartOfTokenColumn, bool InPPDirective) { if (Tok.Finalized) return; Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; - Changes.push_back( - Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel, - Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(), - InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/false)); + Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, + Spaces, StartOfTokenColumn, Newlines, "", "", + InPPDirective && !Tok.IsFirst, + /*IsInsideToken=*/false)); } void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { if (Tok.Finalized) return; - Changes.push_back(Change( - /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0, - /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", - Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/false)); + Changes.push_back(Change(Tok, /*CreateReplacement=*/false, + Tok.WhitespaceRange, /*Spaces=*/0, + Tok.OriginalColumn, Tok.NewlinesBefore, "", "", + InPPDirective && !Tok.IsFirst, + /*IsInsideToken=*/false)); } void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, - unsigned Newlines, unsigned IndentLevel, int Spaces) { + unsigned Newlines, int Spaces) { if (Tok.Finalized) return; SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); - Changes.push_back(Change( - true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), - IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix, - CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown, - InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/Newlines == 0)); + Changes.push_back( + Change(Tok, /*CreateReplacement=*/true, + SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces, + std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix, + InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true)); } const tooling::Replacements &WhitespaceManager::generateReplacements() { @@ -125,30 +121,64 @@ void WhitespaceManager::calculateLineBreakInformation() { Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; Changes[i - 1].IsTrailingComment = - (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof || - (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) && - Changes[i - 1].Kind == tok::comment; + (Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) || + (Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) && + Changes[i - 1].Tok->is(tok::comment) && + // FIXME: This is a dirty hack. The problem is that + // BreakableLineCommentSection does comment reflow changes and here is + // the aligning of trailing comments. Consider the case where we reflow + // the second line up in this example: + // + // // line 1 + // // line 2 + // + // That amounts to 2 changes by BreakableLineCommentSection: + // - the first, delimited by (), for the whitespace between the tokens, + // - and second, delimited by [], for the whitespace at the beginning + // of the second token: + // + // // line 1( + // )[// ]line 2 + // + // So in the end we have two changes like this: + // + // // line1()[ ]line 2 + // + // Note that the OriginalWhitespaceStart of the second change is the + // same as the PreviousOriginalWhitespaceEnd of the first change. + // In this case, the below check ensures that the second change doesn't + // get treated as a trailing comment change here, since this might + // trigger additional whitespace to be wrongly inserted before "line 2" + // by the comment aligner here. + // + // For a proper solution we need a mechanism to say to WhitespaceManager + // that a particular change breaks the current sequence of trailing + // comments. + OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd; } // FIXME: The last token is currently not always an eof token; in those // cases, setting TokenLength of the last token to 0 is wrong. Changes.back().TokenLength = 0; - Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment; + Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment); const WhitespaceManager::Change *LastBlockComment = nullptr; for (auto &Change : Changes) { // Reset the IsTrailingComment flag for changes inside of trailing comments - // so they don't get realigned later. - if (Change.IsInsideToken) + // so they don't get realigned later. Comment line breaks however still need + // to be aligned. + if (Change.IsInsideToken && Change.NewlinesBefore == 0) Change.IsTrailingComment = false; Change.StartOfBlockComment = nullptr; Change.IndentationOffset = 0; - if (Change.Kind == tok::comment) { - LastBlockComment = &Change; - } else if (Change.Kind == tok::unknown) { - if ((Change.StartOfBlockComment = LastBlockComment)) - Change.IndentationOffset = - Change.StartOfTokenColumn - - Change.StartOfBlockComment->StartOfTokenColumn; + if (Change.Tok->is(tok::comment)) { + if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken) + LastBlockComment = &Change; + else { + if ((Change.StartOfBlockComment = LastBlockComment)) + Change.IndentationOffset = + Change.StartOfTokenColumn - + Change.StartOfBlockComment->StartOfTokenColumn; + } } else { LastBlockComment = nullptr; } @@ -162,21 +192,56 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, SmallVector<WhitespaceManager::Change, 16> &Changes) { bool FoundMatchOnLine = false; int Shift = 0; + + // ScopeStack keeps track of the current scope depth. It contains indices of + // the first token on each scope. + // We only run the "Matches" function on tokens from the outer-most scope. + // However, we do need to pay special attention to one class of tokens + // that are not in the outer-most scope, and that is function parameters + // which are split across multiple lines, as illustrated by this example: + // double a(int x); + // int b(int y, + // double z); + // In the above example, we need to take special care to ensure that + // 'double z' is indented along with it's owning function 'b'. + SmallVector<unsigned, 16> ScopeStack; + for (unsigned i = Start; i != End; ++i) { - if (Changes[i].NewlinesBefore > 0) { - FoundMatchOnLine = false; + if (ScopeStack.size() != 0 && + Changes[i].nestingAndIndentLevel() < + Changes[ScopeStack.back()].nestingAndIndentLevel()) + ScopeStack.pop_back(); + + if (i != Start && Changes[i].nestingAndIndentLevel() > + Changes[i - 1].nestingAndIndentLevel()) + ScopeStack.push_back(i); + + bool InsideNestedScope = ScopeStack.size() != 0; + + if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) { Shift = 0; + FoundMatchOnLine = false; } // If this is the first matching token to be aligned, remember by how many // spaces it has to be shifted, so the rest of the changes on the line are // shifted by the same amount - if (!FoundMatchOnLine && Matches(Changes[i])) { + if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) { FoundMatchOnLine = true; Shift = Column - Changes[i].StartOfTokenColumn; Changes[i].Spaces += Shift; } + // This is for function parameters that are split across multiple lines, + // as mentioned in the ScopeStack comment. + if (InsideNestedScope && Changes[i].NewlinesBefore > 0) { + unsigned ScopeStart = ScopeStack.back(); + if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) || + (ScopeStart > Start + 1 && + Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName))) + Changes[i].Spaces += Shift; + } + assert(Shift >= 0); Changes[i].StartOfTokenColumn += Shift; if (i + 1 != Changes.size()) @@ -184,15 +249,37 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, } } -// Walk through all of the changes and find sequences of matching tokens to -// align. To do so, keep track of the lines and whether or not a matching token -// was found on a line. If a matching token is found, extend the current -// sequence. If the current line cannot be part of a sequence, e.g. because -// there is an empty line before it or it contains only non-matching tokens, -// finalize the previous sequence. +// Walk through a subset of the changes, starting at StartAt, and find +// sequences of matching tokens to align. To do so, keep track of the lines and +// whether or not a matching token was found on a line. If a matching token is +// found, extend the current sequence. If the current line cannot be part of a +// sequence, e.g. because there is an empty line before it or it contains only +// non-matching tokens, finalize the previous sequence. +// The value returned is the token on which we stopped, either because we +// exhausted all items inside Changes, or because we hit a scope level higher +// than our initial scope. +// This function is recursive. Each invocation processes only the scope level +// equal to the initial level, which is the level of Changes[StartAt]. +// If we encounter a scope level greater than the initial level, then we call +// ourselves recursively, thereby avoiding the pollution of the current state +// with the alignment requirements of the nested sub-level. This recursive +// behavior is necessary for aligning function prototypes that have one or more +// arguments. +// If this function encounters a scope level less than the initial level, +// it returns the current position. +// There is a non-obvious subtlety in the recursive behavior: Even though we +// defer processing of nested levels to recursive invocations of this +// function, when it comes time to align a sequence of tokens, we run the +// alignment on the entire sequence, including the nested levels. +// When doing so, most of the nested tokens are skipped, because their +// alignment was already handled by the recursive invocations of this function. +// However, the special exception is that we do NOT skip function parameters +// that are split across multiple lines. See the test case in FormatTest.cpp +// that mentions "split function parameter alignment" for an example of this. template <typename F> -static void AlignTokens(const FormatStyle &Style, F &&Matches, - SmallVector<WhitespaceManager::Change, 16> &Changes) { +static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, + SmallVector<WhitespaceManager::Change, 16> &Changes, + unsigned StartAt) { unsigned MinColumn = 0; unsigned MaxColumn = UINT_MAX; @@ -200,14 +287,11 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, unsigned StartOfSequence = 0; unsigned EndOfSequence = 0; - // Keep track of the nesting level of matching tokens, i.e. the number of - // surrounding (), [], or {}. We will only align a sequence of matching - // token that share the same scope depth. - // - // FIXME: This could use FormatToken::NestingLevel information, but there is - // an outstanding issue wrt the brace scopes. - unsigned NestingLevelOfLastMatch = 0; - unsigned NestingLevel = 0; + // Measure the scope level (i.e. depth of (), [], {}) of the first token, and + // abort when we hit any token in a higher scope than the starting one. + auto NestingAndIndentLevel = StartAt < Changes.size() + ? Changes[StartAt].nestingAndIndentLevel() + : std::pair<unsigned, unsigned>(0, 0); // Keep track of the number of commas before the matching tokens, we will only // align a sequence of matching tokens if they are preceded by the same number @@ -235,7 +319,11 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, EndOfSequence = 0; }; - for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + unsigned i = StartAt; + for (unsigned e = Changes.size(); i != e; ++i) { + if (Changes[i].nestingAndIndentLevel() < NestingAndIndentLevel) + break; + if (Changes[i].NewlinesBefore != 0) { CommasBeforeMatch = 0; EndOfSequence = i; @@ -247,33 +335,24 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, FoundMatchOnLine = false; } - if (Changes[i].Kind == tok::comma) { + if (Changes[i].Tok->is(tok::comma)) { ++CommasBeforeMatch; - } else if (Changes[i].Kind == tok::r_brace || - Changes[i].Kind == tok::r_paren || - Changes[i].Kind == tok::r_square) { - --NestingLevel; - } else if (Changes[i].Kind == tok::l_brace || - Changes[i].Kind == tok::l_paren || - Changes[i].Kind == tok::l_square) { - // We want sequences to skip over child scopes if possible, but not the - // other way around. - NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel); - ++NestingLevel; + } else if (Changes[i].nestingAndIndentLevel() > NestingAndIndentLevel) { + // Call AlignTokens recursively, skipping over this scope block. + unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i); + i = StoppedAt - 1; + continue; } if (!Matches(Changes[i])) continue; // If there is more than one matching token per line, or if the number of - // preceding commas, or the scope depth, do not match anymore, end the - // sequence. - if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch || - NestingLevel != NestingLevelOfLastMatch) + // preceding commas, do not match anymore, end the sequence. + if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch) AlignCurrentSequence(); CommasBeforeLastMatch = CommasBeforeMatch; - NestingLevelOfLastMatch = NestingLevel; FoundMatchOnLine = true; if (StartOfSequence == 0) @@ -296,8 +375,9 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, MaxColumn = std::min(MaxColumn, ChangeMaxColumn); } - EndOfSequence = Changes.size(); + EndOfSequence = i; AlignCurrentSequence(); + return i; } void WhitespaceManager::alignConsecutiveAssignments() { @@ -314,9 +394,9 @@ void WhitespaceManager::alignConsecutiveAssignments() { if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0) return false; - return C.Kind == tok::equal; + return C.Tok->is(tok::equal); }, - Changes); + Changes, /*StartAt=*/0); } void WhitespaceManager::alignConsecutiveDeclarations() { @@ -329,9 +409,15 @@ void WhitespaceManager::alignConsecutiveDeclarations() { // const char* const* v1; // float const* v2; // SomeVeryLongType const& v3; - - AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; }, - Changes); + AlignTokens(Style, + [](Change const &C) { + // tok::kw_operator is necessary for aligning operator overload + // definitions. + return C.Tok->is(TT_StartOfName) || + C.Tok->is(TT_FunctionDeclarationName) || + C.Tok->is(tok::kw_operator); + }, + Changes, /*StartAt=*/0); } void WhitespaceManager::alignTrailingComments() { @@ -360,17 +446,14 @@ void WhitespaceManager::alignTrailingComments() { // If this comment follows an } in column 0, it probably documents the // closing of a namespace and we don't want to align it. bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 && - Changes[i - 1].Kind == tok::r_brace && + Changes[i - 1].Tok->is(tok::r_brace) && Changes[i - 1].StartOfTokenColumn == 0; bool WasAlignedWithStartOfNextLine = false; if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. unsigned CommentColumn = SourceMgr.getSpellingColumnNumber( Changes[i].OriginalWhitespaceRange.getEnd()); for (unsigned j = i + 1; j != e; ++j) { - if (Changes[j].Kind == tok::comment || - Changes[j].Kind == tok::unknown) - // Skip over comments and unknown tokens. "unknown tokens are used for - // the continuation of multiline comments. + if (Changes[j].Tok->is(tok::comment)) continue; unsigned NextColumn = SourceMgr.getSpellingColumnNumber( @@ -481,7 +564,8 @@ void WhitespaceManager::generateChanges() { C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn); else appendNewlineText(ReplacementText, C.NewlinesBefore); - appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces), + appendIndentText(ReplacementText, C.Tok->IndentLevel, + std::max(0, C.Spaces), C.StartOfTokenColumn - std::max(0, C.Spaces)); ReplacementText.append(C.CurrentLinePrefix); storeReplacement(C.OriginalWhitespaceRange, ReplacementText); diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index f42e371830b3d..6be4af2622766 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -43,8 +43,7 @@ public: /// \brief Replaces the whitespace in front of \p Tok. Only call once for /// each \c AnnotatedToken. - void replaceWhitespace(FormatToken &Tok, unsigned Newlines, - unsigned IndentLevel, unsigned Spaces, + void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool InPPDirective = false); @@ -72,8 +71,7 @@ public: unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, - unsigned Newlines, unsigned IndentLevel, - int Spaces); + unsigned Newlines, int Spaces); /// \brief Returns all the \c Replacements created during formatting. const tooling::Replacements &generateReplacements(); @@ -91,8 +89,6 @@ public: const SourceManager &SourceMgr; }; - Change() {} - /// \brief Creates a \c Change. /// /// The generated \c Change will replace the characters at @@ -102,12 +98,17 @@ public: /// /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out /// trailing comments and escaped newlines. - Change(bool CreateReplacement, SourceRange OriginalWhitespaceRange, - unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, - unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, tok::TokenKind Kind, - bool ContinuesPPDirective, bool IsStartOfDeclName, - bool IsInsideToken); + Change(const FormatToken &Tok, bool CreateReplacement, + SourceRange OriginalWhitespaceRange, int Spaces, + unsigned StartOfTokenColumn, unsigned NewlinesBefore, + StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, + bool ContinuesPPDirective, bool IsInsideToken); + + // The kind of the token whose whitespace this change replaces, or in which + // this change inserts whitespace. + // FIXME: Currently this is not set correctly for breaks inside comments, as + // the \c BreakableToken is still doing its own alignment. + const FormatToken *Tok; bool CreateReplacement; // Changes might be in the middle of a token, so we cannot just keep the @@ -117,18 +118,7 @@ public: unsigned NewlinesBefore; std::string PreviousLinePostfix; std::string CurrentLinePrefix; - // The kind of the token whose whitespace this change replaces, or in which - // this change inserts whitespace. - // FIXME: Currently this is not set correctly for breaks inside comments, as - // the \c BreakableToken is still doing its own alignment. - tok::TokenKind Kind; bool ContinuesPPDirective; - bool IsStartOfDeclName; - - // The number of nested blocks the token is in. This is used to add tabs - // only for the indentation, and not for alignment, when - // UseTab = US_ForIndentation. - unsigned IndentLevel; // The number of spaces in front of the token or broken part of the token. // This will be adapted when aligning tokens. @@ -159,6 +149,14 @@ public: // the alignment process. const Change *StartOfBlockComment; int IndentationOffset; + + // A combination of nesting level and indent level, which are used in + // tandem to compute lexical scope, for the purposes of deciding + // when to stop consecutive alignment runs. + std::pair<unsigned, unsigned> + nestingAndIndentLevel() const { + return std::make_pair(Tok->NestingLevel, Tok->IndentLevel); + } }; private: |