summaryrefslogtreecommitdiff
path: root/lib/Format
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Format')
-rw-r--r--lib/Format/BreakableToken.cpp736
-rw-r--r--lib/Format/BreakableToken.h362
-rw-r--r--lib/Format/CMakeLists.txt2
-rw-r--r--lib/Format/Comments.cpp36
-rw-r--r--lib/Format/Comments.h33
-rw-r--r--lib/Format/ContinuationIndenter.cpp262
-rw-r--r--lib/Format/ContinuationIndenter.h19
-rw-r--r--lib/Format/Format.cpp147
-rw-r--r--lib/Format/FormatToken.h22
-rw-r--r--lib/Format/FormatTokenLexer.cpp38
-rw-r--r--lib/Format/FormatTokenLexer.h1
-rw-r--r--lib/Format/NamespaceEndCommentsFixer.cpp175
-rw-r--r--lib/Format/NamespaceEndCommentsFixer.h37
-rw-r--r--lib/Format/TokenAnnotator.cpp223
-rw-r--r--lib/Format/TokenAnnotator.h4
-rw-r--r--lib/Format/UnwrappedLineFormatter.cpp46
-rw-r--r--lib/Format/UnwrappedLineFormatter.h8
-rw-r--r--lib/Format/UnwrappedLineParser.cpp286
-rw-r--r--lib/Format/UnwrappedLineParser.h35
-rw-r--r--lib/Format/WhitespaceManager.cpp274
-rw-r--r--lib/Format/WhitespaceManager.h44
21 files changed, 2070 insertions, 720 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index 6363f895f95b9..c97486e4e4a79 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
#include "BreakableToken.h"
-#include "Comments.h"
+#include "ContinuationIndenter.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
@@ -40,6 +40,21 @@ static bool IsBlank(char C) {
}
}
+static StringRef getLineCommentIndentPrefix(StringRef Comment) {
+ static const char *const KnownPrefixes[] = {"///", "//", "//!"};
+ StringRef LongestPrefix;
+ for (StringRef KnownPrefix : KnownPrefixes) {
+ if (Comment.startswith(KnownPrefix)) {
+ size_t PrefixLength = KnownPrefix.size();
+ while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
+ ++PrefixLength;
+ if (PrefixLength > LongestPrefix.size())
+ LongestPrefix = Comment.substr(0, PrefixLength);
+ }
+ }
+ return LongestPrefix;
+}
+
static BreakableToken::Split getCommentSplit(StringRef Text,
unsigned ContentStartColumn,
unsigned ColumnLimit,
@@ -132,37 +147,61 @@ getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
return BreakableToken::Split(StringRef::npos, 0);
}
+bool switchesFormatting(const FormatToken &Token) {
+ assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
+ "formatting regions are switched by comment tokens");
+ StringRef Content = Token.TokenText.substr(2).ltrim();
+ return Content.startswith("clang-format on") ||
+ Content.startswith("clang-format off");
+}
+
+unsigned
+BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+ Split Split) const {
+ // Example: consider the content
+ // lala lala
+ // - RemainingTokenColumns is the original number of columns, 10;
+ // - Split is (4, 2), denoting the two spaces between the two words;
+ //
+ // We compute the number of columns when the split is compressed into a single
+ // space, like:
+ // lala lala
+ return RemainingTokenColumns + 1 - Split.second;
+}
+
unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
- unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
return StartColumn + Prefix.size() + Postfix.size() +
- encoding::columnWidthWithTabs(Line.substr(Offset, Length),
+ encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
StartColumn + Prefix.size(),
Style.TabWidth, Encoding);
}
BreakableSingleLineToken::BreakableSingleLineToken(
- const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn,
- StringRef Prefix, StringRef Postfix, bool InPPDirective,
- encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style),
+ const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableToken(Tok, InPPDirective, Encoding, Style),
StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
- assert(Tok.TokenText.endswith(Postfix));
+ assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
Line = Tok.TokenText.substr(
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
}
BreakableStringLiteral::BreakableStringLiteral(
- const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn,
- StringRef Prefix, StringRef Postfix, bool InPPDirective,
- encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix,
- InPPDirective, Encoding, Style) {}
+ const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
+ Encoding, Style) {}
BreakableToken::Split
BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const {
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
return getStringSplit(Line.substr(TailOffset),
StartColumn + Prefix.size() + Postfix.size(),
ColumnLimit, Style.TabWidth, Encoding);
@@ -171,86 +210,149 @@ BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) {
- unsigned LeadingSpaces = StartColumn;
- // The '@' of an ObjC string literal (@"Test") does not become part of the
- // string token.
- // FIXME: It might be a cleaner solution to merge the tokens as a
- // precomputation step.
- if (Prefix.startswith("@"))
- --LeadingSpaces;
Whitespaces.replaceWhitespaceInToken(
Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
- Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces);
-}
-
-BreakableLineComment::BreakableLineComment(
- const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
- bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableSingleLineToken(Token, IndentLevel, StartColumn,
- getLineCommentIndentPrefix(Token.TokenText), "",
- InPPDirective, Encoding, Style) {
- OriginalPrefix = Prefix;
- if (Token.TokenText.size() > Prefix.size() &&
- isAlphanumeric(Token.TokenText[Prefix.size()])) {
- if (Prefix == "//")
- Prefix = "// ";
- else if (Prefix == "///")
- Prefix = "/// ";
- else if (Prefix == "//!")
- Prefix = "//! ";
- }
+ Prefix, InPPDirective, 1, StartColumn);
}
+BreakableComment::BreakableComment(const FormatToken &Token,
+ unsigned StartColumn,
+ bool InPPDirective,
+ encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableToken(Token, InPPDirective, Encoding, Style),
+ StartColumn(StartColumn) {}
+
+unsigned BreakableComment::getLineCount() const { return Lines.size(); }
+
BreakableToken::Split
-BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const {
- return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ // Don't break lines matching the comment pragmas regex.
+ if (CommentPragmasRegex.match(Content[LineIndex]))
+ return Split(StringRef::npos, 0);
+ return getCommentSplit(Content[LineIndex].substr(TailOffset),
+ getContentStartColumn(LineIndex, TailOffset),
ColumnLimit, Style.TabWidth, Encoding);
}
-void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
- Split Split,
- WhitespaceManager &Whitespaces) {
+void BreakableComment::compressWhitespace(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ StringRef Text = Content[LineIndex].substr(TailOffset);
+ // Text is relative to the content line, but Whitespaces operates relative to
+ // the start of the corresponding token, so compute the start of the Split
+ // that needs to be compressed into a single space relative to the start of
+ // its token.
+ unsigned BreakOffsetInToken =
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
Whitespaces.replaceWhitespaceInToken(
- Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
- Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn);
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
+ /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
}
-void BreakableLineComment::replaceWhitespace(unsigned LineIndex,
- unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) {
- Whitespaces.replaceWhitespaceInToken(
- Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "",
- "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0,
- /*Spaces=*/1);
-}
-
-void BreakableLineComment::replaceWhitespaceBefore(
- unsigned LineIndex, WhitespaceManager &Whitespaces) {
- if (OriginalPrefix != Prefix) {
- Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
- /*InPPDirective=*/false,
- /*Newlines=*/0, /*IndentLevel=*/0,
- /*Spaces=*/1);
+BreakableToken::Split
+BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit) const {
+ unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
+ StringRef TrimmedText = Text.rtrim(Blanks);
+ // This is the width of the resulting line in case the full line of Text gets
+ // reflown up starting at ReflowStartColumn.
+ unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
+ TrimmedText, ReflowStartColumn,
+ Style.TabWidth, Encoding);
+ // If the full line fits up, we return a reflow split after it,
+ // otherwise we compute the largest piece of text that fits after
+ // ReflowStartColumn.
+ Split ReflowSplit =
+ FullWidth <= ColumnLimit
+ ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
+ : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
+ Style.TabWidth, Encoding);
+
+ // We need to be extra careful here, because while it's OK to keep a long line
+ // if it can't be broken into smaller pieces (like when the first word of a
+ // long line is longer than the column limit), it's not OK to reflow that long
+ // word up. So we recompute the size of the previous line after reflowing and
+ // only return the reflow split if that's under the line limit.
+ if (ReflowSplit.first != StringRef::npos &&
+ // Check if the width of the newly reflown line is under the limit.
+ PreviousEndColumn + ReflowPrefix.size() +
+ encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
+ PreviousEndColumn +
+ ReflowPrefix.size(),
+ Style.TabWidth, Encoding) <=
+ ColumnLimit) {
+ return ReflowSplit;
}
+ return Split(StringRef::npos, 0);
+}
+
+const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
+ return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
+}
+
+static bool mayReflowContent(StringRef Content) {
+ Content = Content.trim(Blanks);
+ // Lines starting with '@' commonly have special meaning.
+ static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = {
+ "@", "TODO", "FIXME", "XXX"};
+ bool hasSpecialMeaningPrefix = false;
+ for (StringRef Prefix : kSpecialMeaningPrefixes) {
+ if (Content.startswith(Prefix)) {
+ hasSpecialMeaningPrefix = true;
+ break;
+ }
+ }
+ // Simple heuristic for what to reflow: content should contain at least two
+ // characters and either the first or second character must be
+ // non-punctuation.
+ return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
+ !Content.endswith("\\") &&
+ // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
+ // true, then the first code point must be 1 byte long.
+ (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
}
BreakableBlockComment::BreakableBlockComment(
- const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
+ const FormatToken &Token, unsigned StartColumn,
unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
encoding::Encoding Encoding, const FormatStyle &Style)
- : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) {
- StringRef TokenText(Token.TokenText);
+ : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
+ assert(Tok.is(TT_BlockComment) &&
+ "block comment section must start with a block comment");
+
+ StringRef TokenText(Tok.TokenText);
assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
int IndentDelta = StartColumn - OriginalStartColumn;
- LeadingWhitespace.resize(Lines.size());
- StartOfLineColumn.resize(Lines.size());
- StartOfLineColumn[0] = StartColumn + 2;
+ Content.resize(Lines.size());
+ Content[0] = Lines[0];
+ ContentColumn.resize(Lines.size());
+ // Account for the initial '/*'.
+ ContentColumn[0] = StartColumn + 2;
+ Tokens.resize(Lines.size());
for (size_t i = 1; i < Lines.size(); ++i)
adjustWhitespace(i, IndentDelta);
+ // Align decorations with the column of the star on the first line,
+ // that is one column after the start "/*".
+ DecorationColumn = StartColumn + 1;
+
+ // Account for comment decoration patterns like this:
+ //
+ // /*
+ // ** blah blah blah
+ // */
+ if (Lines.size() >= 2 && Content[1].startswith("**") &&
+ static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
+ DecorationColumn = StartColumn;
+ }
+
Decoration = "* ";
if (Lines.size() == 1 && !FirstInLine) {
// Comments for which FirstInLine is false can start on arbitrary column,
@@ -262,49 +364,60 @@ BreakableBlockComment::BreakableBlockComment(
}
for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
// If the last line is empty, the closing "*/" will have a star.
- if (i + 1 == e && Lines[i].empty())
+ if (i + 1 == e && Content[i].empty())
break;
- if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i]))
+ if (!Content[i].empty() && i + 1 != e &&
+ Decoration.startswith(Content[i]))
continue;
- while (!Lines[i].startswith(Decoration))
+ while (!Content[i].startswith(Decoration))
Decoration = Decoration.substr(0, Decoration.size() - 1);
}
LastLineNeedsDecoration = true;
- IndentAtLineBreak = StartOfLineColumn[0] + 1;
- for (size_t i = 1; i < Lines.size(); ++i) {
- if (Lines[i].empty()) {
- if (i + 1 == Lines.size()) {
+ IndentAtLineBreak = ContentColumn[0] + 1;
+ for (size_t i = 1, e = Lines.size(); i < e; ++i) {
+ if (Content[i].empty()) {
+ if (i + 1 == e) {
// Empty last line means that we already have a star as a part of the
// trailing */. We also need to preserve whitespace, so that */ is
// correctly indented.
LastLineNeedsDecoration = false;
+ // Align the star in the last '*/' with the stars on the previous lines.
+ if (e >= 2 && !Decoration.empty()) {
+ ContentColumn[i] = DecorationColumn;
+ }
} else if (Decoration.empty()) {
// For all other lines, set the start column to 0 if they're empty, so
// we do not insert trailing whitespace anywhere.
- StartOfLineColumn[i] = 0;
+ ContentColumn[i] = 0;
}
continue;
}
// The first line already excludes the star.
+ // The last line excludes the star if LastLineNeedsDecoration is false.
// For all other lines, adjust the line to exclude the star and
// (optionally) the first whitespace.
- unsigned DecorationSize =
- Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size();
- StartOfLineColumn[i] += DecorationSize;
- Lines[i] = Lines[i].substr(DecorationSize);
- LeadingWhitespace[i] += DecorationSize;
- if (!Decoration.startswith(Lines[i]))
+ unsigned DecorationSize = Decoration.startswith(Content[i])
+ ? Content[i].size()
+ : Decoration.size();
+ if (DecorationSize) {
+ ContentColumn[i] = DecorationColumn + DecorationSize;
+ }
+ Content[i] = Content[i].substr(DecorationSize);
+ if (!Decoration.startswith(Content[i]))
IndentAtLineBreak =
- std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
+ std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
}
- IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+ IndentAtLineBreak =
+ std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+
DEBUG({
llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
for (size_t i = 0; i < Lines.size(); ++i) {
- llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
- << "\n";
+ llvm::dbgs() << i << " |" << Content[i] << "| "
+ << "CC=" << ContentColumn[i] << "| "
+ << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
}
});
}
@@ -334,78 +447,162 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
// Adjust Lines to only contain relevant text.
- Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
- Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
- // Adjust LeadingWhitespace to account all whitespace between the lines
- // to the current line.
- LeadingWhitespace[LineIndex] =
- Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+ size_t PreviousContentOffset =
+ Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
+ Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
+ PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
+ Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
// Adjust the start column uniformly across all lines.
- StartOfLineColumn[LineIndex] =
+ ContentColumn[LineIndex] =
encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
IndentDelta;
}
-unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
-
unsigned BreakableBlockComment::getLineLengthAfterSplit(
- unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
- unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
- return ContentStartColumn +
- encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
- ContentStartColumn, Style.TabWidth,
- Encoding) +
- // The last line gets a "*/" postfix.
- (LineIndex + 1 == Lines.size() ? 2 : 0);
-}
-
-BreakableToken::Split
-BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const {
- return getCommentSplit(Lines[LineIndex].substr(TailOffset),
- getContentStartColumn(LineIndex, TailOffset),
- ColumnLimit, Style.TabWidth, Encoding);
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
+ unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+ unsigned LineLength =
+ ContentStartColumn + encoding::columnWidthWithTabs(
+ Content[LineIndex].substr(TailOffset, Length),
+ ContentStartColumn, Style.TabWidth, Encoding);
+ // The last line gets a "*/" postfix.
+ if (LineIndex + 1 == Lines.size()) {
+ LineLength += 2;
+ // We never need a decoration when breaking just the trailing "*/" postfix.
+ // Note that checking that Length == 0 is not enough, since Length could
+ // also be StringRef::npos.
+ if (Content[LineIndex].substr(TailOffset, Length).empty()) {
+ LineLength -= Decoration.size();
+ }
+ }
+ return LineLength;
}
void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
Split Split,
WhitespaceManager &Whitespaces) {
- StringRef Text = Lines[LineIndex].substr(TailOffset);
+ StringRef Text = Content[LineIndex].substr(TailOffset);
StringRef Prefix = Decoration;
+ // We need this to account for the case when we have a decoration "* " for all
+ // the lines except for the last one, where the star in "*/" acts as a
+ // decoration.
+ unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
if (LineIndex + 1 == Lines.size() &&
Text.size() == Split.first + Split.second) {
// For the last line we need to break before "*/", but not to add "* ".
Prefix = "";
+ if (LocalIndentAtLineBreak >= 2)
+ LocalIndentAtLineBreak -= 2;
}
-
+ // The split offset is from the beginning of the line. Convert it to an offset
+ // from the beginning of the token text.
unsigned BreakOffsetInToken =
- Text.data() - Tok.TokenText.data() + Split.first;
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
unsigned CharsToRemove = Split.second;
- assert(IndentAtLineBreak >= Decoration.size());
+ assert(LocalIndentAtLineBreak >= Prefix.size());
Whitespaces.replaceWhitespaceInToken(
- Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1,
- IndentLevel, IndentAtLineBreak - Decoration.size());
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix,
+ InPPDirective, /*Newlines=*/1,
+ /*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
}
-void BreakableBlockComment::replaceWhitespace(unsigned LineIndex,
- unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) {
- StringRef Text = Lines[LineIndex].substr(TailOffset);
- unsigned BreakOffsetInToken =
- Text.data() - Tok.TokenText.data() + Split.first;
- unsigned CharsToRemove = Split.second;
- Whitespaces.replaceWhitespaceInToken(
- Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false,
- /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
+BreakableToken::Split BreakableBlockComment::getSplitBefore(
+ unsigned LineIndex,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ if (!mayReflow(LineIndex, CommentPragmasRegex))
+ return Split(StringRef::npos, 0);
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
+ ColumnLimit);
+}
+
+unsigned BreakableBlockComment::getReflownColumn(
+ StringRef Content,
+ unsigned LineIndex,
+ unsigned PreviousEndColumn) const {
+ unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+ // If this is the last line, it will carry around its '*/' postfix.
+ unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
+ // The line is composed of previous text, reflow prefix, reflown text and
+ // postfix.
+ unsigned ReflownColumn =
+ StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,
+ Style.TabWidth, Encoding) +
+ PostfixLength;
+ return ReflownColumn;
}
+unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
+ unsigned LineIndex, unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const {
+ if (SplitBefore.first == StringRef::npos ||
+ // Block comment line contents contain the trailing whitespace after the
+ // decoration, so the need of left trim. Note that this behavior is
+ // consistent with the breaking of block comments where the indentation of
+ // a broken line is uniform across all the lines of the block comment.
+ SplitBefore.first + SplitBefore.second <
+ Content[LineIndex].ltrim().size()) {
+ // A piece of line, not the whole, gets reflown.
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ } else {
+ // The whole line gets reflown, need to check if we need to insert a break
+ // for the postfix or not.
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ unsigned ReflownColumn =
+ getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+ if (ReflownColumn <= ColumnLimit) {
+ return ReflownColumn;
+ }
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ }
+}
void BreakableBlockComment::replaceWhitespaceBefore(
- unsigned LineIndex, WhitespaceManager &Whitespaces) {
- if (LineIndex == 0)
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ Split SplitBefore, WhitespaceManager &Whitespaces) {
+ if (LineIndex == 0) return;
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ if (SplitBefore.first != StringRef::npos) {
+ // Here we need to reflow.
+ assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
+ "Reflowing whitespace within a token");
+ // This is the offset of the end of the last line relative to the start of
+ // the token text in the token.
+ unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+ Content[LineIndex - 1].size() -
+ tokenAt(LineIndex).TokenText.data();
+ unsigned WhitespaceLength = TrimmedContent.data() -
+ tokenAt(LineIndex).TokenText.data() -
+ WhitespaceOffsetInToken;
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), WhitespaceOffsetInToken,
+ /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
+ /*Spaces=*/0);
+ // Check if we need to also insert a break at the whitespace range.
+ // For this we first adapt the reflow split relative to the beginning of the
+ // content.
+ // Note that we don't need a penalty for this break, since it doesn't change
+ // the total number of lines.
+ Split BreakSplit = SplitBefore;
+ BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();
+ unsigned ReflownColumn =
+ getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+ if (ReflownColumn > ColumnLimit) {
+ insertBreak(LineIndex, 0, BreakSplit, Whitespaces);
+ }
return;
+ }
+
+ // Here no reflow with the previous line will happen.
+ // Fix the decoration of the line at LineIndex.
StringRef Prefix = Decoration;
- if (Lines[LineIndex].empty()) {
+ if (Content[LineIndex].empty()) {
if (LineIndex + 1 == Lines.size()) {
if (!LastLineNeedsDecoration) {
// If the last line was empty, we don't need a prefix, as the */ will
@@ -418,19 +615,35 @@ void BreakableBlockComment::replaceWhitespaceBefore(
Prefix = Prefix.substr(0, 1);
}
} else {
- if (StartOfLineColumn[LineIndex] == 1) {
+ if (ContentColumn[LineIndex] == 1) {
// This line starts immediately after the decorating *.
Prefix = Prefix.substr(0, 1);
}
}
-
- unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() -
- Tok.TokenText.data() -
- LeadingWhitespace[LineIndex];
+ // This is the offset of the end of the last line relative to the start of the
+ // token text in the token.
+ unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+ Content[LineIndex - 1].size() -
+ tokenAt(LineIndex).TokenText.data();
+ unsigned WhitespaceLength = Content[LineIndex].data() -
+ tokenAt(LineIndex).TokenText.data() -
+ WhitespaceOffsetInToken;
Whitespaces.replaceWhitespaceInToken(
- Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
- InPPDirective, 1, IndentLevel,
- StartOfLineColumn[LineIndex] - Prefix.size());
+ tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
+ InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
+}
+
+bool BreakableBlockComment::mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const {
+ // Content[LineIndex] may exclude the indent after the '*' decoration. In that
+ // case, we compute the start of the comment pragma manually.
+ StringRef IndentContent = Content[LineIndex];
+ if (Lines[LineIndex].ltrim(Blanks).startswith("*")) {
+ IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
+ }
+ return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+ mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+ !switchesFormatting(tokenAt(LineIndex));
}
unsigned
@@ -439,7 +652,248 @@ BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
// If we break, we always break at the predefined indent.
if (TailOffset != 0)
return IndentAtLineBreak;
- return std::max(0, StartOfLineColumn[LineIndex]);
+ return std::max(0, ContentColumn[LineIndex]);
+}
+
+BreakableLineCommentSection::BreakableLineCommentSection(
+ const FormatToken &Token, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
+ assert(Tok.is(TT_LineComment) &&
+ "line comment section must start with a line comment");
+ FormatToken *LineTok = nullptr;
+ for (const FormatToken *CurrentTok = &Tok;
+ CurrentTok && CurrentTok->is(TT_LineComment);
+ CurrentTok = CurrentTok->Next) {
+ LastLineTok = LineTok;
+ StringRef TokenText(CurrentTok->TokenText);
+ assert(TokenText.startswith("//"));
+ size_t FirstLineIndex = Lines.size();
+ TokenText.split(Lines, "\n");
+ Content.resize(Lines.size());
+ ContentColumn.resize(Lines.size());
+ OriginalContentColumn.resize(Lines.size());
+ Tokens.resize(Lines.size());
+ Prefix.resize(Lines.size());
+ OriginalPrefix.resize(Lines.size());
+ for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+ // We need to trim the blanks in case this is not the first line in a
+ // multiline comment. Then the indent is included in Lines[i].
+ StringRef IndentPrefix =
+ getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
+ assert(IndentPrefix.startswith("//"));
+ OriginalPrefix[i] = Prefix[i] = IndentPrefix;
+ if (Lines[i].size() > Prefix[i].size() &&
+ isAlphanumeric(Lines[i][Prefix[i].size()])) {
+ if (Prefix[i] == "//")
+ Prefix[i] = "// ";
+ else if (Prefix[i] == "///")
+ Prefix[i] = "/// ";
+ else if (Prefix[i] == "//!")
+ Prefix[i] = "//! ";
+ }
+
+ Tokens[i] = LineTok;
+ Content[i] = Lines[i].substr(IndentPrefix.size());
+ OriginalContentColumn[i] =
+ StartColumn +
+ encoding::columnWidthWithTabs(OriginalPrefix[i],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+ ContentColumn[i] =
+ StartColumn +
+ encoding::columnWidthWithTabs(Prefix[i],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+
+ // Calculate the end of the non-whitespace text in this line.
+ size_t EndOfLine = Content[i].find_last_not_of(Blanks);
+ if (EndOfLine == StringRef::npos)
+ EndOfLine = Content[i].size();
+ else
+ ++EndOfLine;
+ Content[i] = Content[i].substr(0, EndOfLine);
+ }
+ LineTok = CurrentTok->Next;
+ if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
+ // A line comment section needs to broken by a line comment that is
+ // preceded by at least two newlines. Note that we put this break here
+ // instead of breaking at a previous stage during parsing, since that
+ // would split the contents of the enum into two unwrapped lines in this
+ // example, which is undesirable:
+ // enum A {
+ // a, // comment about a
+ //
+ // // comment about b
+ // b
+ // };
+ //
+ // FIXME: Consider putting separate line comment sections as children to
+ // the unwrapped line instead.
+ break;
+ }
+ }
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
+ unsigned ContentStartColumn =
+ (TailOffset == 0 ? ContentColumn[LineIndex]
+ : OriginalContentColumn[LineIndex]);
+ return ContentStartColumn + encoding::columnWidthWithTabs(
+ Content[LineIndex].substr(TailOffset, Length),
+ ContentStartColumn, Style.TabWidth, Encoding);
+}
+
+void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ StringRef Text = Content[LineIndex].substr(TailOffset);
+ // Compute the offset of the split relative to the beginning of the token
+ // text.
+ unsigned BreakOffsetInToken =
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ // Compute the size of the new indent, including the size of the new prefix of
+ // the newly broken line.
+ unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] +
+ Prefix[LineIndex].size() -
+ OriginalPrefix[LineIndex].size();
+ assert(IndentAtLineBreak >= Prefix[LineIndex].size());
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
+ Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
+ /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
+}
+
+BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ if (!mayReflow(LineIndex, CommentPragmasRegex))
+ return Split(StringRef::npos, 0);
+ return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
+ ColumnLimit);
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
+ unsigned LineIndex, unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const {
+ if (SplitBefore.first == StringRef::npos ||
+ SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+ // A piece of line, not the whole line, gets reflown.
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ } else {
+ // The whole line gets reflown.
+ unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+ return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+ }
+}
+
+void BreakableLineCommentSection::replaceWhitespaceBefore(
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ Split SplitBefore, WhitespaceManager &Whitespaces) {
+ // If this is the first line of a token, we need to inform Whitespace Manager
+ // about it: either adapt the whitespace range preceding it, or mark it as an
+ // untouchable token.
+ // This happens for instance here:
+ // // line 1 \
+ // // line 2
+ if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
+ if (SplitBefore.first != StringRef::npos) {
+ // Reflow happens between tokens. Replace the whitespace between the
+ // tokens by the empty string.
+ Whitespaces.replaceWhitespace(
+ *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
+ /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
+ // Replace the indent and prefix of the token with the reflow prefix.
+ unsigned WhitespaceLength =
+ Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
+ Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
+ /*Offset=*/0,
+ /*ReplaceChars=*/WhitespaceLength,
+ /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/ReflowPrefix,
+ /*InPPDirective=*/false,
+ /*Newlines=*/0,
+ /*Spaces=*/0);
+ } else {
+ // This is the first line for the current token, but no reflow with the
+ // previous token is necessary. However, we still may need to adjust the
+ // start column. Note that ContentColumn[LineIndex] is the expected
+ // content column after a possible update to the prefix, hence the prefix
+ // length change is included.
+ unsigned LineColumn =
+ ContentColumn[LineIndex] -
+ (Content[LineIndex].data() - Lines[LineIndex].data()) +
+ (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
+
+ // We always want to create a replacement instead of adding an untouchable
+ // token, even if LineColumn is the same as the original column of the
+ // token. This is because WhitespaceManager doesn't align trailing
+ // comments if they are untouchable.
+ Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+ /*Newlines=*/1,
+ /*Spaces=*/LineColumn,
+ /*StartOfTokenColumn=*/LineColumn,
+ /*InPPDirective=*/false);
+ }
+ }
+ if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
+ // Adjust the prefix if necessary.
+
+ // Take care of the space possibly introduced after a decoration.
+ assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() &&
+ "Expecting a line comment prefix to differ from original by at most "
+ "a space");
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
+ /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
+ }
+ // Add a break after a reflow split has been introduced, if necessary.
+ // Note that this break doesn't need to be penalized, since it doesn't change
+ // the number of lines.
+ if (SplitBefore.first != StringRef::npos &&
+ SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+ insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
+ }
+}
+
+void BreakableLineCommentSection::updateNextToken(LineState& State) const {
+ if (LastLineTok) {
+ State.NextToken = LastLineTok->Next;
+ }
+}
+
+bool BreakableLineCommentSection::mayReflow(
+ unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
+ // Line comments have the indent as part of the prefix, so we need to
+ // recompute the start of the line.
+ StringRef IndentContent = Content[LineIndex];
+ if (Lines[LineIndex].startswith("//")) {
+ IndentContent = Lines[LineIndex].substr(2);
+ }
+ return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+ mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+ !switchesFormatting(tokenAt(LineIndex)) &&
+ OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
+}
+
+unsigned
+BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const {
+ if (TailOffset != 0) {
+ return OriginalContentColumn[LineIndex];
+ }
+ return ContentColumn[LineIndex];
}
} // namespace format
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h
index eb1f9fda30711..e642a538e21c3 100644
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -8,9 +8,10 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// \brief Declares BreakableToken, BreakableStringLiteral, and
-/// BreakableBlockComment classes, that contain token type-specific logic to
-/// break long lines in tokens.
+/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment,
+/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
+/// token type-specific logic to break long lines in tokens and reflow content
+/// between tokens.
///
//===----------------------------------------------------------------------===//
@@ -20,15 +21,49 @@
#include "Encoding.h"
#include "TokenAnnotator.h"
#include "WhitespaceManager.h"
+#include "llvm/Support/Regex.h"
#include <utility>
namespace clang {
namespace format {
+/// \brief Checks if \p Token switches formatting, like /* clang-format off */.
+/// \p Token must be a comment.
+bool switchesFormatting(const FormatToken &Token);
+
struct FormatStyle;
/// \brief Base class for strategies on how to break tokens.
///
+/// This is organised around the concept of a \c Split, which is a whitespace
+/// range that signifies a position of the content of a token where a
+/// reformatting might be done. Operating with splits is divided into 3
+/// operations:
+/// - getSplit, for finding a split starting at a position,
+/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
+/// of the content after a split has been used for breaking, and
+/// - insertBreak, for executing the split using a whitespace manager.
+///
+/// There is a pair of operations that are used to compress a long whitespace
+/// range with a single space if that will bring the line lenght under the
+/// column limit:
+/// - getLineLengthAfterCompression, for calculating the size in columns of the
+/// line after a whitespace range has been compressed, and
+/// - compressWhitespace, for executing the whitespace compression using a
+/// whitespace manager; note that the compressed whitespace may be in the
+/// middle of the original line and of the reformatted line.
+///
+/// For tokens where the whitespace before each line needs to be also
+/// reformatted, for example for tokens supporting reflow, there are analogous
+/// operations that might be executed before the main line breaking occurs:
+/// - getSplitBefore, for finding a split such that the content preceding it
+/// needs to be specially reflown,
+/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
+/// of the remainder of the content after the beginning of the content has
+/// been reformatted, and
+/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+/// manager.
+///
/// FIXME: The interface seems set in stone, so we might want to just pull the
/// strategy into the class, instead of controlling it from the outside.
class BreakableToken {
@@ -42,44 +77,85 @@ public:
virtual unsigned getLineCount() const = 0;
/// \brief Returns the number of columns required to format the piece of line
- /// at \p LineIndex, from byte offset \p Offset with length \p Length.
+ /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
///
- /// Note that previous breaks are not taken into account. \p Offset is always
- /// specified from the start of the (original) line.
+ /// Note that previous breaks are not taken into account. \p TailOffset is
+ /// always specified from the start of the (original) line.
/// \p Length can be set to StringRef::npos, which means "to the end of line".
virtual unsigned
- getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
+ getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
StringRef::size_type Length) const = 0;
/// \brief Returns a range (offset, length) at which to break the line at
/// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
/// violate \p ColumnLimit.
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const = 0;
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const = 0;
/// \brief Emits the previously retrieved \p Split via \p Whitespaces.
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) = 0;
+ /// \brief Returns the number of columns required to format the piece of line
+ /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range
+ /// \p Split has been compressed into a single space.
+ unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+ Split Split) const;
+
/// \brief Replaces the whitespace range described by \p Split with a single
/// space.
- virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
- Split Split,
- WhitespaceManager &Whitespaces) = 0;
+ virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces) = 0;
+
+ /// \brief Returns a whitespace range (offset, length) of the content at
+ /// \p LineIndex such that the content preceding this range needs to be
+ /// reformatted before any breaks are made to this line.
+ ///
+ /// \p PreviousEndColumn is the end column of the previous line after
+ /// formatting.
+ ///
+ /// A result having offset == StringRef::npos means that no piece of the line
+ /// needs to be reformatted before any breaks are made.
+ virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ return Split(StringRef::npos, 0);
+ }
+
+ /// \brief Returns the number of columns required to format the piece of line
+ /// at \p LineIndex after the content preceding the whitespace range specified
+ /// \p SplitBefore has been reformatted, but before any breaks are made to
+ /// this line.
+ virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+ unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const {
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ }
/// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+ /// Performs a reformatting of the content at \p LineIndex preceding the
+ /// whitespace range \p SplitBefore.
virtual void replaceWhitespaceBefore(unsigned LineIndex,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit, Split SplitBefore,
WhitespaceManager &Whitespaces) {}
+ /// \brief Updates the next token of \p State to the next token after this
+ /// one. This can be used when this token manages a set of underlying tokens
+ /// as a unit and is responsible for the formatting of the them.
+ virtual void updateNextToken(LineState &State) const {}
+
protected:
- BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
- bool InPPDirective, encoding::Encoding Encoding,
- const FormatStyle &Style)
- : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective),
- Encoding(Encoding), Style(Style) {}
+ BreakableToken(const FormatToken &Tok, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
+ Style(Style) {}
const FormatToken &Tok;
- const unsigned IndentLevel;
const bool InPPDirective;
const encoding::Encoding Encoding;
const FormatStyle &Style;
@@ -95,10 +171,9 @@ public:
StringRef::size_type Length) const override;
protected:
- BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel,
- unsigned StartColumn, StringRef Prefix,
- StringRef Postfix, bool InPPDirective,
- encoding::Encoding Encoding,
+ BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix,
+ bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
// The column in which the token starts.
@@ -117,107 +192,139 @@ public:
///
/// \p StartColumn specifies the column in which the token will start
/// after formatting.
- BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel,
- unsigned StartColumn, StringRef Prefix,
- StringRef Postfix, bool InPPDirective,
- encoding::Encoding Encoding, const FormatStyle &Style);
+ BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
- Split getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const override;
+ Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) override;
- void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override {}
+ void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) override {}
};
-class BreakableLineComment : public BreakableSingleLineToken {
-public:
- /// \brief Creates a breakable token for a line comment.
+class BreakableComment : public BreakableToken {
+protected:
+ /// \brief Creates a breakable token for a comment.
///
- /// \p StartColumn specifies the column in which the comment will start
- /// after formatting.
- BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
- unsigned StartColumn, bool InPPDirective,
- encoding::Encoding Encoding, const FormatStyle &Style);
+ /// \p StartColumn specifies the column in which the comment will start after
+ /// formatting.
+ BreakableComment(const FormatToken &Token, unsigned StartColumn,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
- Split getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const override;
- void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
- void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
- void replaceWhitespaceBefore(unsigned LineIndex,
- WhitespaceManager &Whitespaces) override;
+public:
+ unsigned getLineCount() const override;
+ Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const override;
+ void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) override;
-private:
- // The prefix without an additional space if one was added.
- StringRef OriginalPrefix;
+protected:
+ virtual unsigned getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const = 0;
+
+ // Returns a split that divides Text into a left and right parts, such that
+ // the left part is suitable for reflowing after PreviousEndColumn.
+ Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+ unsigned PreviousEndColumn, unsigned ColumnLimit) const;
+
+ // Returns the token containing the line at LineIndex.
+ const FormatToken &tokenAt(unsigned LineIndex) const;
+
+ // Checks if the content of line LineIndex may be reflown with the previous
+ // line.
+ virtual bool mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const = 0;
+
+ // Contains the original text of the lines of the block comment.
+ //
+ // In case of a block comments, excludes the leading /* in the first line and
+ // trailing */ in the last line. In case of line comments, excludes the
+ // leading // and spaces.
+ SmallVector<StringRef, 16> Lines;
+
+ // Contains the text of the lines excluding all leading and trailing
+ // whitespace between the lines. Note that the decoration (if present) is also
+ // not considered part of the text.
+ SmallVector<StringRef, 16> Content;
+
+ // Tokens[i] contains a reference to the token containing Lines[i] if the
+ // whitespace range before that token is managed by this block.
+ // Otherwise, Tokens[i] is a null pointer.
+ SmallVector<FormatToken *, 16> Tokens;
+
+ // ContentColumn[i] is the target column at which Content[i] should be.
+ // Note that this excludes a leading "* " or "*" in case of block comments
+ // where all lines have a "*" prefix, or the leading "// " or "//" in case of
+ // line comments.
+ //
+ // In block comments, the first line's target column is always positive. The
+ // remaining lines' target columns are relative to the first line to allow
+ // correct indentation of comments in \c WhitespaceManager. Thus they can be
+ // negative as well (in case the first line needs to be unindented more than
+ // there's actual whitespace in another line).
+ SmallVector<int, 16> ContentColumn;
+
+ // The intended start column of the first line of text from this section.
+ unsigned StartColumn;
+
+ // The prefix to use in front a line that has been reflown up.
+ // For example, when reflowing the second line after the first here:
+ // // comment 1
+ // // comment 2
+ // we expect:
+ // // comment 1 comment 2
+ // and not:
+ // // comment 1comment 2
+ StringRef ReflowPrefix = " ";
};
-class BreakableBlockComment : public BreakableToken {
+class BreakableBlockComment : public BreakableComment {
public:
- /// \brief Creates a breakable token for a block comment.
- ///
- /// \p StartColumn specifies the column in which the comment will start
- /// after formatting, while \p OriginalStartColumn specifies in which
- /// column the comment started before formatting.
- /// If the comment starts a line after formatting, set \p FirstInLine to true.
- BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
- unsigned StartColumn, unsigned OriginaStartColumn,
- bool FirstInLine, bool InPPDirective,
- encoding::Encoding Encoding, const FormatStyle &Style);
+ BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
- unsigned getLineCount() const override;
unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
StringRef::size_type Length) const override;
- Split getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) override;
- void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
- void replaceWhitespaceBefore(unsigned LineIndex,
+ Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const override;
+ unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+ unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const override;
+ void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit, Split SplitBefore,
WhitespaceManager &Whitespaces) override;
+ bool mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const override;
private:
- // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
- // so that all whitespace between the lines is accounted to Lines[LineIndex]
- // as leading whitespace:
- // - Lines[LineIndex] points to the text after that whitespace
- // - Lines[LineIndex-1] shrinks by its trailing whitespace
- // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
- // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
+ // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
//
- // Sets StartOfLineColumn to the intended column in which the text at
+ // Updates Content[LineIndex-1] and Content[LineIndex] by stripping off
+ // leading and trailing whitespace.
+ //
+ // Sets ContentColumn to the intended column in which the text at
// Lines[LineIndex] starts (note that the decoration, if present, is not
// considered part of the text).
void adjustWhitespace(unsigned LineIndex, int IndentDelta);
- // Returns the column at which the text in line LineIndex starts, when broken
- // at TailOffset. Note that the decoration (if present) is not considered part
- // of the text.
- unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
-
- // Contains the text of the lines of the block comment, excluding the leading
- // /* in the first line and trailing */ in the last line, and excluding all
- // trailing whitespace between the lines. Note that the decoration (if
- // present) is also not considered part of the text.
- SmallVector<StringRef, 16> Lines;
+ // Computes the end column if the full Content from LineIndex gets reflown
+ // after PreviousEndColumn.
+ unsigned getReflownColumn(StringRef Content, unsigned LineIndex,
+ unsigned PreviousEndColumn) const;
- // LeadingWhitespace[i] is the number of characters regarded as whitespace in
- // front of Lines[i]. Note that this can include "* " sequences, which we
- // regard as whitespace when all lines have a "*" prefix.
- SmallVector<unsigned, 16> LeadingWhitespace;
-
- // StartOfLineColumn[i] is the target column at which Line[i] should be.
- // Note that this excludes a leading "* " or "*" in case all lines have
- // a "*" prefix.
- // The first line's target column is always positive. The remaining lines'
- // target columns are relative to the first line to allow correct indentation
- // of comments in \c WhitespaceManager. Thus they can be negative as well (in
- // case the first line needs to be unindented more than there's actual
- // whitespace in another line).
- SmallVector<int, 16> StartOfLineColumn;
+ unsigned getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const override;
// The column at which the text of a broken line should start.
// Note that an optional decoration would go before that column.
@@ -237,8 +344,69 @@ private:
// Either "* " if all lines begin with a "*", or empty.
StringRef Decoration;
+
+ // If this block comment has decorations, this is the column of the start of
+ // the decorations.
+ unsigned DecorationColumn;
};
+class BreakableLineCommentSection : public BreakableComment {
+public:
+ BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
+
+ unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const override;
+ void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) override;
+ Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const override;
+ unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+ unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const override;
+ void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit, Split SplitBefore,
+ WhitespaceManager &Whitespaces) override;
+ void updateNextToken(LineState &State) const override;
+ bool mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const override;
+
+private:
+ unsigned getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const override;
+
+ // OriginalPrefix[i] contains the original prefix of line i, including
+ // trailing whitespace before the start of the content. The indentation
+ // preceding the prefix is not included.
+ // For example, if the line is:
+ // // content
+ // then the original prefix is "// ".
+ SmallVector<StringRef, 16> OriginalPrefix;
+
+ // Prefix[i] contains the intended leading "//" with trailing spaces to
+ // account for the indentation of content within the comment at line i after
+ // formatting. It can be different than the original prefix when the original
+ // line starts like this:
+ // //content
+ // Then the original prefix is "//", but the prefix is "// ".
+ SmallVector<StringRef, 16> Prefix;
+
+ SmallVector<unsigned, 16> OriginalContentColumn;
+
+ /// \brief The token to which the last line of this breakable token belongs
+ /// to; nullptr if that token is the initial token.
+ ///
+ /// The distinction is because if the token of the last line of this breakable
+ /// token is distinct from the initial token, this breakable token owns the
+ /// whitespace before the token of the last line, and the whitespace manager
+ /// must be able to modify it.
+ FormatToken *LastLineTok = nullptr;
+};
} // namespace format
} // namespace clang
diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt
index c977c2d3c5fa1..0c7511c1bb07e 100644
--- a/lib/Format/CMakeLists.txt
+++ b/lib/Format/CMakeLists.txt
@@ -3,11 +3,11 @@ set(LLVM_LINK_COMPONENTS support)
add_clang_library(clangFormat
AffectedRangeManager.cpp
BreakableToken.cpp
- Comments.cpp
ContinuationIndenter.cpp
Format.cpp
FormatToken.cpp
FormatTokenLexer.cpp
+ NamespaceEndCommentsFixer.cpp
SortJavaScriptImports.cpp
TokenAnalyzer.cpp
TokenAnnotator.cpp
diff --git a/lib/Format/Comments.cpp b/lib/Format/Comments.cpp
deleted file mode 100644
index 1b27f5b30a603..0000000000000
--- a/lib/Format/Comments.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-//===--- Comments.cpp - Comment Manipulation -------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Implements comment manipulation.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Comments.h"
-
-namespace clang {
-namespace format {
-
-StringRef getLineCommentIndentPrefix(StringRef Comment) {
- static const char *const KnownPrefixes[] = {"///", "//", "//!"};
- StringRef LongestPrefix;
- for (StringRef KnownPrefix : KnownPrefixes) {
- if (Comment.startswith(KnownPrefix)) {
- size_t PrefixLength = KnownPrefix.size();
- while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
- ++PrefixLength;
- if (PrefixLength > LongestPrefix.size())
- LongestPrefix = Comment.substr(0, PrefixLength);
- }
- }
- return LongestPrefix;
-}
-
-} // namespace format
-} // namespace clang
diff --git a/lib/Format/Comments.h b/lib/Format/Comments.h
deleted file mode 100644
index 59f0596361a5c..0000000000000
--- a/lib/Format/Comments.h
+++ /dev/null
@@ -1,33 +0,0 @@
-//===--- Comments.cpp - Comment manipulation -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Declares comment manipulation functionality.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_FORMAT_COMMENTS_H
-#define LLVM_CLANG_LIB_FORMAT_COMMENTS_H
-
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace clang {
-namespace format {
-
-/// \brief Returns the comment prefix of the line comment \p Comment.
-///
-/// The comment prefix consists of a leading known prefix, like "//" or "///",
-/// together with the following whitespace.
-StringRef getLineCommentIndentPrefix(StringRef Comment);
-
-} // namespace format
-} // namespace clang
-
-#endif
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 6bb6fb3060352..73ae10a29f8fb 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -20,7 +20,7 @@
#include "clang/Format/Format.h"
#include "llvm/Support/Debug.h"
-#define DEBUG_TYPE "format-formatter"
+#define DEBUG_TYPE "format-indenter"
namespace clang {
namespace format {
@@ -57,8 +57,10 @@ static bool startsNextParameter(const FormatToken &Current,
Style.BreakConstructorInitializersBeforeComma)
return true;
return Previous.is(tok::comma) && !Current.isTrailingComment() &&
- (Previous.isNot(TT_CtorInitializerComma) ||
- !Style.BreakConstructorInitializersBeforeComma);
+ ((Previous.isNot(TT_CtorInitializerComma) ||
+ !Style.BreakConstructorInitializersBeforeComma) &&
+ (Previous.isNot(TT_InheritanceComma) ||
+ !Style.BreakBeforeInheritanceComma));
}
ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
@@ -80,7 +82,7 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
State.Column = FirstIndent;
State.Line = Line;
State.NextToken = Line->First;
- State.Stack.push_back(ParenState(FirstIndent, Line->Level, FirstIndent,
+ State.Stack.push_back(ParenState(FirstIndent, FirstIndent,
/*AvoidBinPacking=*/false,
/*NoLineBreak=*/false));
State.LineContainsContinuedForLoopSection = false;
@@ -135,6 +137,12 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
return false;
}
+ // If binary operators are moved to the next line (including commas for some
+ // styles of constructor initializers), that's always ok.
+ if (!Current.isOneOf(TT_BinaryOperator, tok::comma) &&
+ State.Stack.back().NoLineBreakInOperand)
+ return false;
+
return !State.Stack.back().NoLineBreak;
}
@@ -150,7 +158,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
return true;
if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
(Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) &&
- Style.Language == FormatStyle::LK_Cpp &&
+ Style.isCpp() &&
// FIXME: This is a temporary workaround for the case where clang-format
// sets BreakBeforeParameter to avoid bin packing and this creates a
// completely unnecessary line break after a template type that isn't
@@ -191,6 +199,18 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
Current.NestingLevel < State.StartOfLineLevel))
return true;
+ if (startsSegmentOfBuilderTypeCall(Current) &&
+ (State.Stack.back().CallContinuation != 0 ||
+ State.Stack.back().BreakBeforeParameter) &&
+ // JavaScript is treated different here as there is a frequent pattern:
+ // SomeFunction(function() {
+ // ...
+ // }.bind(...));
+ // FIXME: We should find a more generic solution to this problem.
+ !(State.Column <= NewLineColumn &&
+ Style.Language == FormatStyle::LK_JavaScript))
+ return true;
+
if (State.Column <= NewLineColumn)
return false;
@@ -255,11 +275,6 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
!Previous.is(tok::kw_template) && State.Stack.back().BreakBeforeParameter)
return true;
- if (startsSegmentOfBuilderTypeCall(Current) &&
- (State.Stack.back().CallContinuation != 0 ||
- State.Stack.back().BreakBeforeParameter))
- return true;
-
// The following could be precomputed as they do not depend on the state.
// However, as they should take effect only if the UnwrappedLine does not fit
// into the ColumnLimit, they are checked here in the ContinuationIndenter.
@@ -334,8 +349,13 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;
if (!DryRun)
- Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, /*IndentLevel=*/0,
- Spaces, State.Column + Spaces);
+ Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces,
+ State.Column + Spaces);
+
+ // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance
+ // declaration unless there is multiple inheritance.
+ if (Style.BreakBeforeInheritanceComma && Current.is(TT_InheritanceColon))
+ State.Stack.back().NoLineBreak = true;
if (Current.is(TT_SelectorName) &&
!State.Stack.back().ObjCSelectorNameFound) {
@@ -370,6 +390,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
Current.FakeLParens.size() > 0 &&
Current.FakeLParens.back() > prec::Unknown)
State.Stack.back().NoLineBreak = true;
+ if (Previous.is(TT_TemplateString) && Previous.opensScope())
+ State.Stack.back().NoLineBreak = true;
if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) &&
@@ -385,7 +407,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
State.Stack.back().NoLineBreak = true;
if (Current.isMemberAccess() && Previous.is(tok::r_paren) &&
(Previous.MatchingParen &&
- (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) {
+ (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10)))
// If there is a function call with long parameters, break before trailing
// calls. This prevents things like:
// EXPECT_CALL(SomeLongParameter).Times(
@@ -393,6 +415,31 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
// We don't want to do this for short parameters as they can just be
// indexes.
State.Stack.back().NoLineBreak = true;
+
+ // Don't allow the RHS of an operator to be split over multiple lines unless
+ // there is a line-break right after the operator.
+ // Exclude relational operators, as there, it is always more desirable to
+ // have the LHS 'left' of the RHS.
+ const FormatToken *P = Current.getPreviousNonComment();
+ if (!Current.is(tok::comment) && P &&
+ (P->isOneOf(TT_BinaryOperator, tok::comma) ||
+ (P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&
+ !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
+ P->getPrecedence() != prec::Assignment &&
+ P->getPrecedence() != prec::Relational) {
+ bool BreakBeforeOperator =
+ P->MustBreakBefore || P->is(tok::lessless) ||
+ (P->is(TT_BinaryOperator) &&
+ Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) ||
+ (P->is(TT_ConditionalExpr) && Style.BreakBeforeTernaryOperators);
+ // Don't do this if there are only two operands. In these cases, there is
+ // always a nice vertical separation between them and the extra line break
+ // does not help.
+ bool HasTwoOperands =
+ P->OperatorIndex == 0 && !P->NextOperator && !P->is(TT_ConditionalExpr);
+ if ((!BreakBeforeOperator && !(HasTwoOperands && Style.AlignOperands)) ||
+ (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator))
+ State.Stack.back().NoLineBreakInOperand = true;
}
State.Column += Spaces;
@@ -540,9 +587,8 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
if (!DryRun) {
unsigned Newlines = std::max(
1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
- Whitespaces.replaceWhitespace(Current, Newlines,
- State.Stack.back().IndentLevel, State.Column,
- State.Column, State.Line->InPPDirective);
+ Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
+ State.Line->InPPDirective);
}
if (!Current.isTrailingComment())
@@ -559,9 +605,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
// Any break on this level means that the parent level has been broken
// and we need to avoid bin packing there.
bool NestedBlockSpecialCase =
- Style.Language != FormatStyle::LK_Cpp &&
- Style.Language != FormatStyle::LK_ObjC &&
- Current.is(tok::r_brace) && State.Stack.size() > 1 &&
+ !Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 &&
State.Stack[State.Stack.size() - 2].NestedBlockInlined;
if (!NestedBlockSpecialCase)
for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i)
@@ -580,7 +624,9 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
// If we break after { or the [ of an array initializer, we should also break
// before the corresponding } or ].
if (PreviousNonComment &&
- (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)))
+ (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+ (PreviousNonComment->is(TT_TemplateString) &&
+ PreviousNonComment->opensScope())))
State.Stack.back().BreakBeforeClosingBrace = true;
if (State.Stack.back().AvoidBinPacking) {
@@ -628,14 +674,16 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
return State.Stack[State.Stack.size() - 2].LastSpace;
return State.FirstIndent;
}
+ if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())
+ return State.Stack[State.Stack.size() - 2].LastSpace;
if (Current.is(tok::identifier) && Current.Next &&
Current.Next->is(TT_DictLiteral))
return State.Stack.back().Indent;
- if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
- return State.StartOfStringLiteral;
if (NextNonComment->is(TT_ObjCStringLiteral) &&
State.StartOfStringLiteral != 0)
return State.StartOfStringLiteral - 1;
+ if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
+ return State.StartOfStringLiteral;
if (NextNonComment->is(tok::lessless) &&
State.Stack.back().FirstLessLess != 0)
return State.Stack.back().FirstLessLess;
@@ -696,10 +744,11 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))
return ContinuationIndent;
- if (NextNonComment->is(TT_CtorInitializerColon))
- return State.FirstIndent + Style.ConstructorInitializerIndentWidth;
if (NextNonComment->is(TT_CtorInitializerComma))
return State.Stack.back().Indent;
+ if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon,
+ TT_InheritanceComma))
+ return State.FirstIndent + Style.ConstructorInitializerIndentWidth;
if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() &&
!Current.isOneOf(tok::colon, tok::comment))
return ContinuationIndent;
@@ -716,6 +765,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
assert(State.Stack.size());
const FormatToken &Current = *State.NextToken;
+ if (Current.isOneOf(tok::comma, TT_BinaryOperator))
+ State.Stack.back().NoLineBreakInOperand = false;
if (Current.is(TT_InheritanceColon))
State.Stack.back().AvoidBinPacking = true;
if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) {
@@ -724,8 +775,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
else
State.Stack.back().LastOperatorWrapped = Newline;
}
- if ((Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless)) ||
- Current.is(TT_ConditionalExpr))
+ if (Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless))
+ State.Stack.back().LastOperatorWrapped = Newline;
+ if (Current.is(TT_ConditionalExpr) && Current.Previous &&
+ !Current.Previous->is(TT_ConditionalExpr))
State.Stack.back().LastOperatorWrapped = Newline;
if (Current.is(TT_ArraySubscriptLSquare) &&
State.Stack.back().StartOfArraySubscripts == 0)
@@ -765,9 +818,14 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.Stack.back().AvoidBinPacking = true;
State.Stack.back().BreakBeforeParameter = false;
}
+ if (Current.is(TT_InheritanceColon))
+ State.Stack.back().Indent =
+ State.FirstIndent + Style.ContinuationIndentWidth;
if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline)
State.Stack.back().NestedBlockIndent =
State.Column + Current.ColumnWidth + 1;
+ if (Current.isOneOf(TT_LambdaLSquare, TT_LambdaArrow))
+ State.Stack.back().LastSpace = State.Column;
// Insert scopes created by fake parenthesis.
const FormatToken *Previous = Current.getPreviousNonComment();
@@ -795,21 +853,30 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
}
moveStatePastFakeLParens(State, Newline);
- moveStatePastScopeOpener(State, Newline);
moveStatePastScopeCloser(State);
+ if (Current.is(TT_TemplateString) && Current.opensScope())
+ State.Stack.back().LastSpace =
+ (Current.IsMultiline ? Current.LastLineColumnWidth
+ : State.Column + Current.ColumnWidth) -
+ strlen("${");
+ bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&
+ !State.Stack.back().NoLineBreakInOperand;
+ moveStatePastScopeOpener(State, Newline);
moveStatePastFakeRParens(State);
- if (Current.isStringLiteral() && State.StartOfStringLiteral == 0)
- State.StartOfStringLiteral = State.Column;
if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
State.StartOfStringLiteral = State.Column + 1;
+ else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0)
+ State.StartOfStringLiteral = State.Column;
else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
!Current.isStringLiteral())
State.StartOfStringLiteral = 0;
State.Column += Current.ColumnWidth;
State.NextToken = State.NextToken->Next;
- unsigned Penalty = breakProtrudingToken(Current, State, DryRun);
+ unsigned Penalty = 0;
+ if (CanBreakProtrudingToken)
+ Penalty = breakProtrudingToken(Current, State, DryRun);
if (State.Column > getColumnLimit(State)) {
unsigned ExcessCharacters = State.Column - getColumnLimit(State);
Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
@@ -848,6 +915,9 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
I != E; ++I) {
ParenState NewParenState = State.Stack.back();
NewParenState.ContainsLineBreak = false;
+ NewParenState.LastOperatorWrapped = true;
+ NewParenState.NoLineBreak =
+ NewParenState.NoLineBreak || State.Stack.back().NoLineBreakInOperand;
// Indent from 'LastSpace' unless these are fake parentheses encapsulating
// a builder type call after 'return' or, if the alignment after opening
@@ -862,24 +932,6 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
std::max(std::max(State.Column, NewParenState.Indent),
State.Stack.back().LastSpace);
- // Don't allow the RHS of an operator to be split over multiple lines unless
- // there is a line-break right after the operator.
- // Exclude relational operators, as there, it is always more desirable to
- // have the LHS 'left' of the RHS.
- if (Previous && Previous->getPrecedence() != prec::Assignment &&
- Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, tok::comma) &&
- Previous->getPrecedence() != prec::Relational) {
- bool BreakBeforeOperator =
- Previous->is(tok::lessless) ||
- (Previous->is(TT_BinaryOperator) &&
- Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) ||
- (Previous->is(TT_ConditionalExpr) &&
- Style.BreakBeforeTernaryOperators);
- if ((!Newline && !BreakBeforeOperator) ||
- (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator))
- NewParenState.NoLineBreak = true;
- }
-
// Do not indent relative to the fake parentheses inserted for "." or "->".
// This is a special case to make the following to statements consistent:
// OuterFunction(InnerFunctionCall( // break
@@ -931,7 +983,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
}
unsigned NewIndent;
- unsigned NewIndentLevel = State.Stack.back().IndentLevel;
unsigned LastSpace = State.Stack.back().LastSpace;
bool AvoidBinPacking;
bool BreakBeforeParameter = false;
@@ -941,7 +992,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
if (Current.opensBlockOrBlockTypeList(Style)) {
NewIndent = State.Stack.back().NestedBlockIndent + Style.IndentWidth;
NewIndent = std::min(State.Column + 2, NewIndent);
- ++NewIndentLevel;
} else {
NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth;
}
@@ -966,12 +1016,23 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
// int> v);
// FIXME: We likely want to do this for more combinations of brackets.
// Verify that it is wanted for ObjC, too.
- if (Current.Tok.getKind() == tok::less &&
- Current.ParentBracket == tok::l_paren) {
+ if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) {
NewIndent = std::max(NewIndent, State.Stack.back().Indent);
LastSpace = std::max(LastSpace, State.Stack.back().Indent);
}
+ // JavaScript template strings are special as we always want to indent
+ // nested expressions relative to the ${}. Otherwise, this can create quite
+ // a mess.
+ if (Current.is(TT_TemplateString)) {
+ unsigned Column = Current.IsMultiline
+ ? Current.LastLineColumnWidth
+ : State.Column + Current.ColumnWidth;
+ NewIndent = Column;
+ LastSpace = Column;
+ NestedBlockIndent = Column;
+ }
+
AvoidBinPacking =
(State.Line->MustBeDeclaration && !Style.BinPackParameters) ||
(!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
@@ -1003,17 +1064,15 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
// Generally inherit NoLineBreak from the current scope to nested scope.
// However, don't do this for non-empty nested blocks, dict literals and
// array literals as these follow different indentation rules.
- const FormatToken *Previous = Current.getPreviousNonComment();
bool NoLineBreak =
Current.Children.empty() &&
!Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) &&
(State.Stack.back().NoLineBreak ||
+ State.Stack.back().NoLineBreakInOperand ||
(Current.is(TT_TemplateOpener) &&
- State.Stack.back().ContainsUnwrappedBuilder) ||
- (Current.is(tok::l_brace) && !Newline && Previous &&
- Previous->is(tok::comma)));
- State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, LastSpace,
- AvoidBinPacking, NoLineBreak));
+ State.Stack.back().ContainsUnwrappedBuilder));
+ State.Stack.push_back(
+ ParenState(NewIndent, LastSpace, AvoidBinPacking, NoLineBreak));
State.Stack.back().NestedBlockIndent = NestedBlockIndent;
State.Stack.back().BreakBeforeParameter = BreakBeforeParameter;
State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1;
@@ -1027,7 +1086,7 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) {
// If we encounter a closing ), ], } or >, we can remove a level from our
// stacks.
if (State.Stack.size() > 1 &&
- (Current.isOneOf(tok::r_paren, tok::r_square) ||
+ (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) ||
(Current.is(tok::r_brace) && State.NextToken != State.Line->First) ||
State.NextToken->is(TT_TemplateCloser)))
State.Stack.pop_back();
@@ -1047,10 +1106,9 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) {
NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace)
? Style.ObjCBlockIndentWidth
: Style.IndentWidth);
- State.Stack.push_back(ParenState(
- NewIndent, /*NewIndentLevel=*/State.Stack.back().IndentLevel + 1,
- State.Stack.back().LastSpace, /*AvoidBinPacking=*/true,
- /*NoLineBreak=*/false));
+ State.Stack.push_back(ParenState(NewIndent, State.Stack.back().LastSpace,
+ /*AvoidBinPacking=*/true,
+ /*NoLineBreak=*/false));
State.Stack.back().NestedBlockIndent = NestedBlockIndent;
State.Stack.back().BreakBeforeParameter = true;
}
@@ -1117,44 +1175,42 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
StringRef Text = Current.TokenText;
StringRef Prefix;
StringRef Postfix;
- bool IsNSStringLiteral = false;
// FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
// FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
// reduce the overhead) for each FormatToken, which is a string, so that we
// don't run multiple checks here on the hot path.
- if (Text.startswith("\"") && Current.Previous &&
- Current.Previous->is(tok::at)) {
- IsNSStringLiteral = true;
- Prefix = "@\"";
- }
if ((Text.endswith(Postfix = "\"") &&
- (IsNSStringLiteral || Text.startswith(Prefix = "\"") ||
+ (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") ||
Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") ||
Text.startswith(Prefix = "u8\"") ||
Text.startswith(Prefix = "L\""))) ||
(Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
- Token.reset(new BreakableStringLiteral(
- Current, State.Line->Level, StartColumn, Prefix, Postfix,
- State.Line->InPPDirective, Encoding, Style));
+ Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
+ Postfix, State.Line->InPPDirective,
+ Encoding, Style));
} else {
return 0;
}
} else if (Current.is(TT_BlockComment)) {
if (!Current.isTrailingComment() || !Style.ReflowComments ||
- CommentPragmasRegex.match(Current.TokenText.substr(2)))
+ // If a comment token switches formatting, like
+ // /* clang-format on */, we don't want to break it further,
+ // but we may still want to adjust its indentation.
+ switchesFormatting(Current))
return addMultilineToken(Current, State);
Token.reset(new BreakableBlockComment(
- Current, State.Line->Level, StartColumn, Current.OriginalColumn,
- !Current.Previous, State.Line->InPPDirective, Encoding, Style));
+ Current, StartColumn, Current.OriginalColumn, !Current.Previous,
+ State.Line->InPPDirective, Encoding, Style));
} else if (Current.is(TT_LineComment) &&
(Current.Previous == nullptr ||
Current.Previous->isNot(TT_ImplicitStringLiteral))) {
if (!Style.ReflowComments ||
- CommentPragmasRegex.match(Current.TokenText.substr(2)))
+ CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+ switchesFormatting(Current))
return 0;
- Token.reset(new BreakableLineComment(Current, State.Line->Level,
- StartColumn, /*InPPDirective=*/false,
- Encoding, Style));
+ Token.reset(new BreakableLineCommentSection(
+ Current, StartColumn, Current.OriginalColumn, !Current.Previous,
+ /*InPPDirective=*/false, Encoding, Style));
// We don't insert backslashes when breaking line comments.
ColumnLimit = Style.ColumnLimit;
} else {
@@ -1165,18 +1221,30 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
bool BreakInserted = false;
+ // We use a conservative reflowing strategy. Reflow starts after a line is
+ // broken or the corresponding whitespace compressed. Reflow ends as soon as a
+ // line that doesn't get reflown with the previous line is reached.
+ bool ReflowInProgress = false;
unsigned Penalty = 0;
unsigned RemainingTokenColumns = 0;
for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
LineIndex != EndIndex; ++LineIndex) {
+ BreakableToken::Split SplitBefore(StringRef::npos, 0);
+ if (ReflowInProgress) {
+ SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
+ RemainingSpace, CommentPragmasRegex);
+ }
+ ReflowInProgress = SplitBefore.first != StringRef::npos;
+ unsigned TailOffset =
+ ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
if (!DryRun)
- Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
- unsigned TailOffset = 0;
- RemainingTokenColumns =
- Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
+ RemainingSpace, SplitBefore, Whitespaces);
+ RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
+ LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
while (RemainingTokenColumns > RemainingSpace) {
- BreakableToken::Split Split =
- Token->getSplit(LineIndex, TailOffset, ColumnLimit);
+ BreakableToken::Split Split = Token->getSplit(
+ LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);
if (Split.first == StringRef::npos) {
// The last line's penalty is handled in addNextStateToQueue().
if (LineIndex < EndIndex - 1)
@@ -1185,17 +1253,23 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
break;
}
assert(Split.first != 0);
- unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
- LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
- // We can remove extra whitespace instead of breaking the line.
- if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) {
- RemainingTokenColumns = 0;
+ // Check if compressing the whitespace range will bring the line length
+ // under the limit. If that is the case, we perform whitespace compression
+ // instead of inserting a line break.
+ unsigned RemainingTokenColumnsAfterCompression =
+ Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
+ if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
+ RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
+ ReflowInProgress = true;
if (!DryRun)
- Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces);
+ Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
break;
}
+ unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
+ LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+
// When breaking before a tab character, it may be moved by a few columns,
// but will still be expanded to the next tab stop, so we don't save any
// columns.
@@ -1213,6 +1287,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
}
TailOffset += Split.first + Split.second;
RemainingTokenColumns = NewRemainingTokenColumns;
+ ReflowInProgress = true;
BreakInserted = true;
}
}
@@ -1233,6 +1308,9 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
State.Stack.back().LastSpace = StartColumn;
}
+
+ Token->updateNextToken(State);
+
return Penalty;
}
diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h
index 21ad653c4fa4c..9a06aa6f62672 100644
--- a/lib/Format/ContinuationIndenter.h
+++ b/lib/Format/ContinuationIndenter.h
@@ -146,12 +146,12 @@ private:
};
struct ParenState {
- ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
- bool AvoidBinPacking, bool NoLineBreak)
- : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
- NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
- AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
- NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
+ ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
+ bool NoLineBreak)
+ : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent),
+ BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
+ BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
+ NoLineBreakInOperand(false), LastOperatorWrapped(true),
ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
AlignColons(true), ObjCSelectorNameFound(false),
HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
@@ -160,9 +160,6 @@ struct ParenState {
/// indented.
unsigned Indent;
- /// \brief The number of indentation levels of the block.
- unsigned IndentLevel;
-
/// \brief The position of the last space on each level.
///
/// Used e.g. to break like:
@@ -224,6 +221,10 @@ struct ParenState {
/// \brief Line breaking in this context would break a formatting rule.
bool NoLineBreak : 1;
+ /// \brief Same as \c NoLineBreak, but is restricted until the end of the
+ /// operand (including the next ",").
+ bool NoLineBreakInOperand : 1;
+
/// \brief True if the last binary operator on this level was wrapped to the
/// next line.
bool LastOperatorWrapped : 1;
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 389761d482498..0e2da71343d5d 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -17,6 +17,7 @@
#include "AffectedRangeManager.h"
#include "ContinuationIndenter.h"
#include "FormatTokenLexer.h"
+#include "NamespaceEndCommentsFixer.h"
#include "SortJavaScriptImports.h"
#include "TokenAnalyzer.h"
#include "TokenAnnotator.h"
@@ -297,6 +298,8 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
IO.mapOptional("ColumnLimit", Style.ColumnLimit);
IO.mapOptional("CommentPragmas", Style.CommentPragmas);
+ IO.mapOptional("BreakBeforeInheritanceComma",
+ Style.BreakBeforeInheritanceComma);
IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
IO.mapOptional("ConstructorInitializerIndentWidth",
@@ -307,6 +310,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("DisableFormat", Style.DisableFormat);
IO.mapOptional("ExperimentalAutoDetectBinPacking",
Style.ExperimentalAutoDetectBinPacking);
+ IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments);
IO.mapOptional("ForEachMacros", Style.ForEachMacros);
IO.mapOptional("IncludeCategories", Style.IncludeCategories);
IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
@@ -421,6 +425,11 @@ std::error_code make_error_code(ParseError e) {
return std::error_code(static_cast<int>(e), getParseCategory());
}
+inline llvm::Error make_string_error(const llvm::Twine &Message) {
+ return llvm::make_error<llvm::StringError>(Message,
+ llvm::inconvertibleErrorCode());
+}
+
const char *ParseErrorCategory::name() const noexcept {
return "clang-format.parse_error";
}
@@ -514,6 +523,7 @@ FormatStyle getLLVMStyle() {
false, false, false, false, false};
LLVMStyle.BreakAfterJavaFieldAnnotations = false;
LLVMStyle.BreakConstructorInitializersBeforeComma = false;
+ LLVMStyle.BreakBeforeInheritanceComma = false;
LLVMStyle.BreakStringLiterals = true;
LLVMStyle.ColumnLimit = 80;
LLVMStyle.CommentPragmas = "^ IWYU pragma:";
@@ -523,6 +533,7 @@ FormatStyle getLLVMStyle() {
LLVMStyle.Cpp11BracedListStyle = true;
LLVMStyle.DerivePointerAlignment = false;
LLVMStyle.ExperimentalAutoDetectBinPacking = false;
+ LLVMStyle.FixNamespaceComments = true;
LLVMStyle.ForEachMacros.push_back("foreach");
LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
@@ -546,7 +557,6 @@ FormatStyle getLLVMStyle() {
LLVMStyle.SpacesBeforeTrailingComments = 1;
LLVMStyle.Standard = FormatStyle::LS_Cpp11;
LLVMStyle.UseTab = FormatStyle::UT_Never;
- LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
LLVMStyle.ReflowComments = true;
LLVMStyle.SpacesInParentheses = false;
LLVMStyle.SpacesInSquareBrackets = false;
@@ -614,8 +624,9 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
GoogleStyle.BreakBeforeTernaryOperators = false;
- GoogleStyle.CommentPragmas =
- "(taze:|@(export|requirecss|return|returns|see|visibility)) ";
+ // taze:, @tag followed by { for a lot of JSDoc tags, and @see, which is
+ // commonly followed by overlong URLs.
+ GoogleStyle.CommentPragmas = "(taze:|(@[A-Za-z_0-9-]+[ \\t]*{)|@see)";
GoogleStyle.MaxEmptyLinesToKeep = 3;
GoogleStyle.NamespaceIndentation = FormatStyle::NI_All;
GoogleStyle.SpacesInContainerLiterals = false;
@@ -648,8 +659,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
ChromiumStyle.AllowShortLoopsOnASingleLine = false;
ChromiumStyle.BinPackParameters = false;
ChromiumStyle.DerivePointerAlignment = false;
+ if (Language == FormatStyle::LK_ObjC)
+ ChromiumStyle.ColumnLimit = 80;
}
- ChromiumStyle.SortIncludes = false;
return ChromiumStyle;
}
@@ -666,9 +678,11 @@ FormatStyle getMozillaStyle() {
MozillaStyle.BinPackArguments = false;
MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
MozillaStyle.BreakConstructorInitializersBeforeComma = true;
+ MozillaStyle.BreakBeforeInheritanceComma = true;
MozillaStyle.ConstructorInitializerIndentWidth = 2;
MozillaStyle.ContinuationIndentWidth = 2;
MozillaStyle.Cpp11BracedListStyle = false;
+ MozillaStyle.FixNamespaceComments = false;
MozillaStyle.IndentCaseLabels = true;
MozillaStyle.ObjCSpaceAfterProperty = true;
MozillaStyle.ObjCSpaceBeforeProtocolList = false;
@@ -689,6 +703,7 @@ FormatStyle getWebKitStyle() {
Style.BreakConstructorInitializersBeforeComma = true;
Style.Cpp11BracedListStyle = false;
Style.ColumnLimit = 0;
+ Style.FixNamespaceComments = false;
Style.IndentWidth = 4;
Style.NamespaceIndentation = FormatStyle::NI_Inner;
Style.ObjCBlockIndentWidth = 4;
@@ -706,6 +721,7 @@ FormatStyle getGNUStyle() {
Style.BreakBeforeTernaryOperators = true;
Style.Cpp11BracedListStyle = false;
Style.ColumnLimit = 79;
+ Style.FixNamespaceComments = false;
Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
Style.Standard = FormatStyle::LS_Cpp03;
return Style;
@@ -1457,12 +1473,22 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
return Replaces;
}
+bool isMpegTS(StringRef Code) {
+ // MPEG transport streams use the ".ts" file extension. clang-format should
+ // not attempt to format those. MPEG TS' frame format starts with 0x47 every
+ // 189 bytes - detect that and return.
+ return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47;
+}
+
tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
ArrayRef<tooling::Range> Ranges,
StringRef FileName, unsigned *Cursor) {
tooling::Replacements Replaces;
if (!Style.SortIncludes)
return Replaces;
+ if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript &&
+ isMpegTS(Code))
+ return Replaces;
if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript)
return sortJavaScriptImports(Style, Code, Ranges, FileName);
sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor);
@@ -1531,8 +1557,8 @@ inline bool isHeaderDeletion(const tooling::Replacement &Replace) {
// tokens and returns an offset after the sequence.
unsigned getOffsetAfterTokenSequence(
StringRef FileName, StringRef Code, const FormatStyle &Style,
- std::function<unsigned(const SourceManager &, Lexer &, Token &)>
- GetOffsetAfterSequense) {
+ llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
+ GetOffsetAfterSequence) {
std::unique_ptr<Environment> Env =
Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
const SourceManager &SourceMgr = Env->getSourceManager();
@@ -1541,7 +1567,7 @@ unsigned getOffsetAfterTokenSequence(
Token Tok;
// Get the first token.
Lex.LexFromRawLexer(Tok);
- return GetOffsetAfterSequense(SourceMgr, Lex, Tok);
+ return GetOffsetAfterSequence(SourceMgr, Lex, Tok);
}
// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
@@ -1645,7 +1671,7 @@ bool isDeletedHeader(llvm::StringRef HeaderName,
tooling::Replacements
fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
const FormatStyle &Style) {
- if (Style.Language != FormatStyle::LanguageKind::LK_Cpp)
+ if (!Style.isCpp())
return Replaces;
tooling::Replacements HeaderInsertions;
@@ -1808,23 +1834,36 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
FormatStyle Expanded = expandPresets(Style);
if (Expanded.DisableFormat)
return tooling::Replacements();
-
+ if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code))
+ return tooling::Replacements();
auto Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
- if (Style.Language == FormatStyle::LK_JavaScript &&
- Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) {
- JavaScriptRequoter Requoter(*Env, Expanded);
- tooling::Replacements Requotes = Requoter.process();
- if (!Requotes.empty()) {
- auto NewCode = applyAllReplacements(Code, Requotes);
+ auto reformatAfterApplying = [&] (TokenAnalyzer& Fixer) {
+ tooling::Replacements Fixes = Fixer.process();
+ if (!Fixes.empty()) {
+ auto NewCode = applyAllReplacements(Code, Fixes);
if (NewCode) {
auto NewEnv = Environment::CreateVirtualEnvironment(
*NewCode, FileName,
- tooling::calculateRangesAfterReplacements(Requotes, Ranges));
+ tooling::calculateRangesAfterReplacements(Fixes, Ranges));
Formatter Format(*NewEnv, Expanded, IncompleteFormat);
- return Requotes.merge(Format.process());
+ return Fixes.merge(Format.process());
}
}
+ Formatter Format(*Env, Expanded, IncompleteFormat);
+ return Format.process();
+ };
+
+ if (Style.Language == FormatStyle::LK_Cpp &&
+ Style.FixNamespaceComments) {
+ NamespaceEndCommentsFixer CommentsFixer(*Env, Expanded);
+ return reformatAfterApplying(CommentsFixer);
+ }
+
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) {
+ JavaScriptRequoter Requoter(*Env, Expanded);
+ return reformatAfterApplying(Requoter);
}
Formatter Format(*Env, Expanded, IncompleteFormat);
@@ -1840,13 +1879,24 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
return Clean.process();
}
+tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
+ StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName) {
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+ NamespaceEndCommentsFixer Fix(*Env, Style);
+ return Fix.process();
+}
+
LangOptions getFormattingLangOpts(const FormatStyle &Style) {
LangOptions LangOpts;
LangOpts.CPlusPlus = 1;
LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
+ LangOpts.CPlusPlus1z = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
LangOpts.LineComment = 1;
- bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
+ bool AlternativeOperators = Style.isCpp();
LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
LangOpts.Bool = 1;
LangOpts.ObjC1 = 1;
@@ -1882,9 +1932,9 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
return FormatStyle::LK_Cpp;
}
-FormatStyle getStyle(StringRef StyleName, StringRef FileName,
- StringRef FallbackStyle, StringRef Code,
- vfs::FileSystem *FS) {
+llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName,
+ StringRef FallbackStyleName,
+ StringRef Code, vfs::FileSystem *FS) {
if (!FS) {
FS = vfs::getRealFileSystem().get();
}
@@ -1898,35 +1948,28 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName,
(Code.contains("\n- (") || Code.contains("\n+ (")))
Style.Language = FormatStyle::LK_ObjC;
- if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
- llvm::errs() << "Invalid fallback style \"" << FallbackStyle
- << "\" using LLVM style\n";
- return Style;
- }
+ FormatStyle FallbackStyle = getNoStyle();
+ if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle))
+ return make_string_error("Invalid fallback style \"" + FallbackStyleName);
if (StyleName.startswith("{")) {
// Parse YAML/JSON style from the command line.
- if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
- llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
- << FallbackStyle << " style\n";
- }
+ if (std::error_code ec = parseConfiguration(StyleName, &Style))
+ return make_string_error("Error parsing -style: " + ec.message());
return Style;
}
if (!StyleName.equals_lower("file")) {
if (!getPredefinedStyle(StyleName, Style.Language, &Style))
- llvm::errs() << "Invalid value for -style, using " << FallbackStyle
- << " style\n";
+ return make_string_error("Invalid value for -style");
return Style;
}
// Look for .clang-format/_clang-format file in the file's parent directories.
SmallString<128> UnsuitableConfigFiles;
SmallString<128> Path(FileName);
- if (std::error_code EC = FS->makeAbsolute(Path)) {
- llvm::errs() << EC.message() << "\n";
- return Style;
- }
+ if (std::error_code EC = FS->makeAbsolute(Path))
+ return make_string_error(EC.message());
for (StringRef Directory = Path; !Directory.empty();
Directory = llvm::sys::path::parent_path(Directory)) {
@@ -1943,25 +1986,23 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName,
DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
Status = FS->status(ConfigFile.str());
- bool IsFile =
+ bool FoundConfigFile =
Status && (Status->getType() == llvm::sys::fs::file_type::regular_file);
- if (!IsFile) {
+ if (!FoundConfigFile) {
// Try _clang-format too, since dotfiles are not commonly used on Windows.
ConfigFile = Directory;
llvm::sys::path::append(ConfigFile, "_clang-format");
DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
Status = FS->status(ConfigFile.str());
- IsFile = Status &&
- (Status->getType() == llvm::sys::fs::file_type::regular_file);
+ FoundConfigFile = Status && (Status->getType() ==
+ llvm::sys::fs::file_type::regular_file);
}
- if (IsFile) {
+ if (FoundConfigFile) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
FS->getBufferForFile(ConfigFile.str());
- if (std::error_code EC = Text.getError()) {
- llvm::errs() << EC.message() << "\n";
- break;
- }
+ if (std::error_code EC = Text.getError())
+ return make_string_error(EC.message());
if (std::error_code ec =
parseConfiguration(Text.get()->getBuffer(), &Style)) {
if (ec == ParseError::Unsuitable) {
@@ -1970,20 +2011,18 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName,
UnsuitableConfigFiles.append(ConfigFile);
continue;
}
- llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
- << "\n";
- break;
+ return make_string_error("Error reading " + ConfigFile + ": " +
+ ec.message());
}
DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
return Style;
}
}
- if (!UnsuitableConfigFiles.empty()) {
- llvm::errs() << "Configuration file(s) do(es) not support "
- << getLanguageName(Style.Language) << ": "
- << UnsuitableConfigFiles << "\n";
- }
- return Style;
+ if (!UnsuitableConfigFiles.empty())
+ return make_string_error("Configuration file(s) do(es) not support " +
+ getLanguageName(Style.Language) + ": " +
+ UnsuitableConfigFiles);
+ return FallbackStyle;
}
} // namespace format
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index ea3bbe368d5b0..c9649126d93f5 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -48,11 +48,13 @@ namespace format {
TYPE(FunctionTypeLParen) \
TYPE(ImplicitStringLiteral) \
TYPE(InheritanceColon) \
+ TYPE(InheritanceComma) \
TYPE(InlineASMBrace) \
TYPE(InlineASMColon) \
TYPE(JavaAnnotation) \
TYPE(JsComputedPropertyName) \
TYPE(JsFatArrow) \
+ TYPE(JsNonNullAssertion) \
TYPE(JsTypeColon) \
TYPE(JsTypeOperator) \
TYPE(JsTypeOptionalQuestion) \
@@ -220,6 +222,9 @@ struct FormatToken {
/// [], {} or <>.
unsigned NestingLevel = 0;
+ /// \brief The indent level of this token. Copied from the surrounding line.
+ unsigned IndentLevel = 0;
+
/// \brief Penalty for inserting a line break before this token.
unsigned SplitPenalty = 0;
@@ -258,6 +263,11 @@ struct FormatToken {
/// Only set if \c Type == \c TT_StartOfName.
bool PartOfMultiVariableDeclStmt = false;
+ /// \brief Does this line comment continue a line comment section?
+ ///
+ /// Only set to true if \c Type == \c TT_LineComment.
+ bool ContinuesLineCommentSection = false;
+
/// \brief If this is a bracket, this points to the matching one.
FormatToken *MatchingParen = nullptr;
@@ -334,11 +344,15 @@ struct FormatToken {
/// \brief Returns whether \p Tok is ([{ or a template opening <.
bool opensScope() const {
+ if (is(TT_TemplateString) && TokenText.endswith("${"))
+ return true;
return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
TT_TemplateOpener);
}
/// \brief Returns whether \p Tok is )]} or a template closing >.
bool closesScope() const {
+ if (is(TT_TemplateString) && TokenText.startswith("}"))
+ return true;
return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
TT_TemplateCloser);
}
@@ -443,6 +457,8 @@ struct FormatToken {
/// \brief Returns \c true if this tokens starts a block-type list, i.e. a
/// list that should be indented with a block indent.
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
+ if (is(TT_TemplateString) && opensScope())
+ return true;
return is(TT_ArrayInitializerLSquare) ||
(is(tok::l_brace) &&
(BlockKind == BK_Block || is(TT_DictLiteral) ||
@@ -451,6 +467,8 @@ struct FormatToken {
/// \brief Same as opensBlockOrBlockTypeList, but for the closing token.
bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
+ if (is(TT_TemplateString) && closesScope())
+ return true;
return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
}
@@ -618,6 +636,8 @@ struct AdditionalKeywords {
kw_synchronized = &IdentTable.get("synchronized");
kw_throws = &IdentTable.get("throws");
kw___except = &IdentTable.get("__except");
+ kw___has_include = &IdentTable.get("__has_include");
+ kw___has_include_next = &IdentTable.get("__has_include_next");
kw_mark = &IdentTable.get("mark");
@@ -644,6 +664,8 @@ struct AdditionalKeywords {
IdentifierInfo *kw_NS_ENUM;
IdentifierInfo *kw_NS_OPTIONS;
IdentifierInfo *kw___except;
+ IdentifierInfo *kw___has_include;
+ IdentifierInfo *kw___has_include_next;
// JavaScript keywords.
IdentifierInfo *kw_as;
diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp
index 46a32a917dd93..4ee43d6937e00 100644
--- a/lib/Format/FormatTokenLexer.cpp
+++ b/lib/Format/FormatTokenLexer.cpp
@@ -64,6 +64,8 @@ void FormatTokenLexer::tryMergePreviousTokens() {
return;
if (tryMergeLessLess())
return;
+ if (tryMergeNSStringLiteral())
+ return;
if (Style.Language == FormatStyle::LK_JavaScript) {
static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
@@ -82,6 +84,35 @@ void FormatTokenLexer::tryMergePreviousTokens() {
if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
return;
}
+
+ if (Style.Language == FormatStyle::LK_Java) {
+ static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater,
+ tok::greater,
+ tok::greater};
+ static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater,
+ tok::greater,
+ tok::greaterequal};
+ if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator))
+ return;
+ if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
+ return;
+ }
+}
+
+bool FormatTokenLexer::tryMergeNSStringLiteral() {
+ if (Tokens.size() < 2)
+ return false;
+ auto &At = *(Tokens.end() - 2);
+ auto &String = *(Tokens.end() - 1);
+ if (!At->is(tok::at) || !String->is(tok::string_literal))
+ return false;
+ At->Tok.setKind(tok::string_literal);
+ At->TokenText = StringRef(At->TokenText.begin(),
+ String->TokenText.end() - At->TokenText.begin());
+ At->ColumnWidth += String->ColumnWidth;
+ At->Type = TT_ObjCStringLiteral;
+ Tokens.erase(Tokens.end() - 1);
+ return true;
}
bool FormatTokenLexer::tryMergeLessLess() {
@@ -157,7 +188,9 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
// postfix unary operators. If the '++' is followed by a non-operand
// introducing token, the slash here is the operand and not the start of a
// regex.
- if (Prev->isOneOf(tok::plusplus, tok::minusminus))
+ // `!` is an unary prefix operator, but also a post-fix operator that casts
+ // away nullability, so the same check applies.
+ if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
// The previous token must introduce an operand location where regex
@@ -558,8 +591,7 @@ FormatToken *FormatTokenLexer::getNextToken() {
Column = FormatTok->LastLineColumnWidth;
}
- if (Style.Language == FormatStyle::LK_Cpp ||
- Style.Language == FormatStyle::LK_ObjC) {
+ if (Style.isCpp()) {
if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
tok::pp_define) &&
diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h
index c47b0e725d366..bf10f09cd11e1 100644
--- a/lib/Format/FormatTokenLexer.h
+++ b/lib/Format/FormatTokenLexer.h
@@ -47,6 +47,7 @@ private:
void tryMergePreviousTokens();
bool tryMergeLessLess();
+ bool tryMergeNSStringLiteral();
bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp
new file mode 100644
index 0000000000000..88cf123c18990
--- /dev/null
+++ b/lib/Format/NamespaceEndCommentsFixer.cpp
@@ -0,0 +1,175 @@
+//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
+/// fixes namespace end comments.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NamespaceEndCommentsFixer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Regex.h"
+
+#define DEBUG_TYPE "namespace-end-comments-fixer"
+
+namespace clang {
+namespace format {
+
+namespace {
+// The maximal number of unwrapped lines that a short namespace spans.
+// Short namespaces don't need an end comment.
+static const int kShortNamespaceMaxLines = 1;
+
+// Matches a valid namespace end comment.
+// Valid namespace end comments don't need to be edited.
+static llvm::Regex kNamespaceCommentPattern =
+ llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
+ "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
+ llvm::Regex::IgnoreCase);
+
+// Computes the name of a namespace given the namespace token.
+// Returns "" for anonymous namespace.
+std::string computeName(const FormatToken *NamespaceTok) {
+ assert(NamespaceTok && NamespaceTok->is(tok::kw_namespace) &&
+ "expecting a namespace token");
+ std::string name = "";
+ // Collects all the non-comment tokens between 'namespace' and '{'.
+ const FormatToken *Tok = NamespaceTok->getNextNonComment();
+ while (Tok && !Tok->is(tok::l_brace)) {
+ name += Tok->TokenText;
+ Tok = Tok->getNextNonComment();
+ }
+ return name;
+}
+
+std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline) {
+ std::string text = "// namespace";
+ if (!NamespaceName.empty()) {
+ text += ' ';
+ text += NamespaceName;
+ }
+ if (AddNewline)
+ text += '\n';
+ return text;
+}
+
+bool hasEndComment(const FormatToken *RBraceTok) {
+ return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
+}
+
+bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName) {
+ assert(hasEndComment(RBraceTok));
+ const FormatToken *Comment = RBraceTok->Next;
+ SmallVector<StringRef, 7> Groups;
+ if (kNamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
+ StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
+ // Anonymous namespace comments must not mention a namespace name.
+ if (NamespaceName.empty() && !NamespaceNameInComment.empty())
+ return false;
+ StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
+ // Named namespace comments must not mention anonymous namespace.
+ if (!NamespaceName.empty() && !AnonymousInComment.empty())
+ return false;
+ return NamespaceNameInComment == NamespaceName;
+ }
+ return false;
+}
+
+void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
+ const SourceManager &SourceMgr,
+ tooling::Replacements *Fixes) {
+ auto EndLoc = RBraceTok->Tok.getEndLoc();
+ auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
+ auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
+ if (Err) {
+ llvm::errs() << "Error while adding namespace end comment: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+}
+
+void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
+ const SourceManager &SourceMgr,
+ tooling::Replacements *Fixes) {
+ assert(hasEndComment(RBraceTok));
+ const FormatToken *Comment = RBraceTok->Next;
+ auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
+ Comment->Tok.getEndLoc());
+ auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
+ if (Err) {
+ llvm::errs() << "Error while updating namespace end comment: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+}
+} // namespace
+
+NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
+ const FormatStyle &Style)
+ : TokenAnalyzer(Env, Style) {}
+
+tooling::Replacements NamespaceEndCommentsFixer::analyze(
+ TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) {
+ const SourceManager &SourceMgr = Env.getSourceManager();
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+ AnnotatedLines.end());
+ tooling::Replacements Fixes;
+ for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
+ if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective ||
+ !AnnotatedLines[I]->startsWith(tok::r_brace))
+ continue;
+ const AnnotatedLine *EndLine = AnnotatedLines[I];
+ size_t StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
+ if (StartLineIndex == UnwrappedLine::kInvalidIndex)
+ continue;
+ assert(StartLineIndex < E);
+ const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
+ // Detect "(inline)? namespace" in the beginning of a line.
+ if (NamespaceTok->is(tok::kw_inline))
+ NamespaceTok = NamespaceTok->getNextNonComment();
+ if (!NamespaceTok || NamespaceTok->isNot(tok::kw_namespace))
+ continue;
+ FormatToken *RBraceTok = EndLine->First;
+ if (RBraceTok->Finalized)
+ continue;
+ RBraceTok->Finalized = true;
+ const FormatToken *EndCommentPrevTok = RBraceTok;
+ // Namespaces often end with '};'. In that case, attach namespace end
+ // comments to the semicolon tokens.
+ if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) {
+ EndCommentPrevTok = RBraceTok->Next;
+ }
+ // The next token in the token stream after the place where the end comment
+ // token must be. This is either the next token on the current line or the
+ // first token on the next line.
+ const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
+ if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
+ EndCommentNextTok = EndCommentNextTok->Next;
+ if (!EndCommentNextTok && I + 1 < E)
+ EndCommentNextTok = AnnotatedLines[I + 1]->First;
+ bool AddNewline = EndCommentNextTok &&
+ EndCommentNextTok->NewlinesBefore == 0 &&
+ EndCommentNextTok->isNot(tok::eof);
+ const std::string NamespaceName = computeName(NamespaceTok);
+ const std::string EndCommentText =
+ computeEndCommentText(NamespaceName, AddNewline);
+ if (!hasEndComment(EndCommentPrevTok)) {
+ bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1;
+ if (!isShort)
+ addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
+ continue;
+ }
+ if (!validEndComment(EndCommentPrevTok, NamespaceName))
+ updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
+ }
+ return Fixes;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h
new file mode 100644
index 0000000000000..7790668a2e829
--- /dev/null
+++ b/lib/Format/NamespaceEndCommentsFixer.h
@@ -0,0 +1,37 @@
+//===--- NamespaceEndCommentsFixer.h ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that
+/// fixes namespace end comments.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H
+#define LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H
+
+#include "TokenAnalyzer.h"
+
+namespace clang {
+namespace format {
+
+class NamespaceEndCommentsFixer : public TokenAnalyzer {
+public:
+ NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style);
+
+ tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index b5f7de280acd7..004800fc2a4e0 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -311,14 +311,13 @@ private:
// In C++, this can happen either in array of templates (foo<int>[10])
// or when array is a nested template type (unique_ptr<type1<type2>[]>).
bool CppArrayTemplates =
- Style.Language == FormatStyle::LK_Cpp && Parent &&
+ Style.isCpp() && Parent &&
Parent->is(TT_TemplateCloser) &&
(Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
Contexts.back().InTemplateArgument);
bool StartsObjCMethodExpr =
- !CppArrayTemplates && (Style.Language == FormatStyle::LK_Cpp ||
- Style.Language == FormatStyle::LK_ObjC) &&
+ !CppArrayTemplates && Style.isCpp() &&
Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
CurrentToken->isNot(tok::l_brace) &&
(!Parent ||
@@ -337,6 +336,9 @@ private:
Contexts.back().ContextKind == tok::l_brace &&
Parent->isOneOf(tok::l_brace, tok::comma)) {
Left->Type = TT_JsComputedPropertyName;
+ } else if (CurrentToken->is(tok::r_square) && Parent &&
+ Parent->is(TT_TemplateCloser)) {
+ Left->Type = TT_ArraySubscriptLSquare;
} else if (Style.Language == FormatStyle::LK_Proto ||
(!CppArrayTemplates && Parent &&
Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
@@ -433,9 +435,7 @@ private:
if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
FormatToken *Previous = CurrentToken->getPreviousNonComment();
if (((CurrentToken->is(tok::colon) &&
- (!Contexts.back().ColonIsDictLiteral ||
- (Style.Language != FormatStyle::LK_Cpp &&
- Style.Language != FormatStyle::LK_ObjC))) ||
+ (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
Style.Language == FormatStyle::LK_Proto) &&
(Previous->Tok.getIdentifierInfo() ||
Previous->is(tok::string_literal)))
@@ -676,6 +676,8 @@ private:
case tok::comma:
if (Contexts.back().InCtorInitializer)
Tok->Type = TT_CtorInitializerComma;
+ else if (Contexts.back().InInheritanceList)
+ Tok->Type = TT_InheritanceComma;
else if (Contexts.back().FirstStartOfName &&
(Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
@@ -684,6 +686,12 @@ private:
if (Contexts.back().IsForEachMacro)
Contexts.back().IsExpression = true;
break;
+ case tok::identifier:
+ if (Tok->isOneOf(Keywords.kw___has_include,
+ Keywords.kw___has_include_next)) {
+ parseHasInclude();
+ }
+ break;
default:
break;
}
@@ -727,6 +735,14 @@ private:
}
}
+ void parseHasInclude() {
+ if (!CurrentToken || !CurrentToken->is(tok::l_paren))
+ return;
+ next(); // '('
+ parseIncludeDirective();
+ next(); // ')'
+ }
+
LineType parsePreprocessorDirective() {
bool IsFirstToken = CurrentToken->IsFirst;
LineType Type = LT_PreprocessorDirective;
@@ -777,8 +793,14 @@ private:
default:
break;
}
- while (CurrentToken)
+ while (CurrentToken) {
+ FormatToken *Tok = CurrentToken;
next();
+ if (Tok->isOneOf(Keywords.kw___has_include,
+ Keywords.kw___has_include_next)) {
+ parseHasInclude();
+ }
+ }
return Type;
}
@@ -885,7 +907,7 @@ private:
TT_FunctionLBrace, TT_ImplicitStringLiteral,
TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
TT_OverloadedOperator, TT_RegexLiteral,
- TT_TemplateString))
+ TT_TemplateString, TT_ObjCStringLiteral))
CurrentToken->Type = TT_Unknown;
CurrentToken->Role.reset();
CurrentToken->MatchingParen = nullptr;
@@ -925,6 +947,7 @@ private:
bool CanBeExpression = true;
bool InTemplateArgument = false;
bool InCtorInitializer = false;
+ bool InInheritanceList = false;
bool CaretFound = false;
bool IsForEachMacro = false;
};
@@ -984,6 +1007,9 @@ private:
Current.Previous->is(TT_CtorInitializerColon)) {
Contexts.back().IsExpression = true;
Contexts.back().InCtorInitializer = true;
+ } else if (Current.Previous &&
+ Current.Previous->is(TT_InheritanceColon)) {
+ Contexts.back().InInheritanceList = true;
} else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
for (FormatToken *Previous = Current.Previous;
Previous && Previous->isOneOf(tok::star, tok::amp);
@@ -1004,6 +1030,23 @@ private:
// The token type is already known.
return;
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (Current.is(tok::exclaim)) {
+ if (Current.Previous &&
+ (Current.Previous->isOneOf(tok::identifier, tok::r_paren,
+ tok::r_square, tok::r_brace) ||
+ Current.Previous->Tok.isLiteral())) {
+ Current.Type = TT_JsNonNullAssertion;
+ return;
+ }
+ if (Current.Next &&
+ Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
+ Current.Type = TT_JsNonNullAssertion;
+ return;
+ }
+ }
+ }
+
// Line.MightBeFunctionDecl can only be true after the parentheses of a
// function declaration have been found. In this case, 'Current' is a
// trailing token of this declaration and thus cannot be a name.
@@ -1063,7 +1106,8 @@ private:
if (Current.MatchingParen && Current.Next &&
!Current.Next->isBinaryOperator() &&
!Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
- tok::period, tok::arrow, tok::coloncolon))
+ tok::comma, tok::period, tok::arrow,
+ tok::coloncolon))
if (FormatToken *AfterParen = Current.MatchingParen->Next) {
// Make sure this isn't the return type of an Obj-C block declaration
if (AfterParen->Tok.isNot(tok::caret)) {
@@ -1076,21 +1120,17 @@ private:
}
}
} else if (Current.is(tok::at) && Current.Next) {
- if (Current.Next->isStringLiteral()) {
- Current.Type = TT_ObjCStringLiteral;
- } else {
- switch (Current.Next->Tok.getObjCKeywordID()) {
- case tok::objc_interface:
- case tok::objc_implementation:
- case tok::objc_protocol:
- Current.Type = TT_ObjCDecl;
- break;
- case tok::objc_property:
- Current.Type = TT_ObjCProperty;
- break;
- default:
- break;
- }
+ switch (Current.Next->Tok.getObjCKeywordID()) {
+ case tok::objc_interface:
+ case tok::objc_implementation:
+ case tok::objc_protocol:
+ Current.Type = TT_ObjCDecl;
+ break;
+ case tok::objc_property:
+ Current.Type = TT_ObjCProperty;
+ break;
+ default:
+ break;
}
} else if (Current.is(tok::period)) {
FormatToken *PreviousNoComment = Current.getPreviousNonComment();
@@ -1137,16 +1177,17 @@ private:
if (Tok.isNot(tok::identifier) || !Tok.Previous)
return false;
- if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof))
+ if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
+ Keywords.kw_as))
return false;
if (Style.Language == FormatStyle::LK_JavaScript &&
Tok.Previous->is(Keywords.kw_in))
return false;
// Skip "const" as it does not have an influence on whether this is a name.
- FormatToken *PreviousNotConst = Tok.Previous;
+ FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
- PreviousNotConst = PreviousNotConst->Previous;
+ PreviousNotConst = PreviousNotConst->getPreviousNonComment();
if (!PreviousNotConst)
return false;
@@ -1175,9 +1216,7 @@ private:
/// \brief Determine whether ')' is ending a cast.
bool rParenEndsCast(const FormatToken &Tok) {
// C-style casts are only used in C++ and Java.
- if (Style.Language != FormatStyle::LK_Cpp &&
- Style.Language != FormatStyle::LK_ObjC &&
- Style.Language != FormatStyle::LK_Java)
+ if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java)
return false;
// Empty parens aren't casts and there are no casts at the end of the line.
@@ -1282,7 +1321,8 @@ private:
return TT_UnaryOperator;
const FormatToken *NextToken = Tok.getNextNonComment();
- if (!NextToken || NextToken->isOneOf(tok::arrow, tok::equal) ||
+ if (!NextToken ||
+ NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) ||
(NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
return TT_PointerOrReference;
@@ -1445,7 +1485,9 @@ public:
// At the end of the line or when an operator with higher precedence is
// found, insert fake parenthesis and return.
- if (!Current || (Current->closesScope() && Current->MatchingParen) ||
+ if (!Current ||
+ (Current->closesScope() &&
+ (Current->MatchingParen || Current->is(TT_TemplateString))) ||
(CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
(CurrentPrecedence == prec::Conditional &&
Precedence == prec::Assignment && Current->is(tok::colon))) {
@@ -1454,7 +1496,9 @@ public:
// Consume scopes: (), [], <> and {}
if (Current->opensScope()) {
- while (Current && !Current->closesScope()) {
+ // In fragment of a JavaScript template string can look like '}..${' and
+ // thus close a scope and open a new one at the same time.
+ while (Current && (!Current->closesScope() || Current->opensScope())) {
next();
parse();
}
@@ -1493,13 +1537,14 @@ private:
return prec::Conditional;
if (NextNonComment && NextNonComment->is(tok::colon) &&
NextNonComment->is(TT_DictLiteral))
- return prec::Comma;
+ return prec::Assignment;
+ if (Current->is(TT_JsComputedPropertyName))
+ return prec::Assignment;
if (Current->is(TT_LambdaArrow))
return prec::Comma;
if (Current->is(TT_JsFatArrow))
return prec::Assignment;
- if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName,
- TT_JsComputedPropertyName) ||
+ if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
(Current->is(tok::comment) && NextNonComment &&
NextNonComment->is(TT_SelectorName)))
return 0;
@@ -1510,7 +1555,7 @@ private:
Current->is(Keywords.kw_instanceof))
return prec::Relational;
if (Style.Language == FormatStyle::LK_JavaScript &&
- Current->is(Keywords.kw_in))
+ Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
return prec::Relational;
if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
return Current->getPrecedence();
@@ -1594,8 +1639,14 @@ void TokenAnnotator::setCommentLineLevels(
for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
E = Lines.rend();
I != E; ++I) {
- if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
- (*I)->First->Next == nullptr)
+ bool CommentLine = (*I)->First;
+ for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
+ if (!Tok->is(tok::comment)) {
+ CommentLine = false;
+ break;
+ }
+ }
+ if (NextNonCommentLine && CommentLine)
(*I)->Level = NextNonCommentLine->Level;
else
NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
@@ -1697,7 +1748,7 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
}
}
- // Check whether parameter list can be long to a function declaration.
+ // Check whether parameter list can belong to a function declaration.
if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
return false;
// If the lines ends with "{", this is likely an function definition.
@@ -1711,6 +1762,10 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
return true;
for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
Tok = Tok->Next) {
+ if (Tok->is(tok::l_paren) && Tok->MatchingParen) {
+ Tok = Tok->MatchingParen;
+ continue;
+ }
if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
return true;
@@ -1753,8 +1808,6 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
Line.First->TotalLength =
Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
- if (!Line.First->Next)
- return;
FormatToken *Current = Line.First->Next;
bool InFunctionDecl = Line.MightBeFunctionDecl;
while (Current) {
@@ -1830,9 +1883,18 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
}
calculateUnbreakableTailLengths(Line);
+ unsigned IndentLevel = Line.Level;
for (Current = Line.First; Current != nullptr; Current = Current->Next) {
if (Current->Role)
Current->Role->precomputeFormattingInfos(Current);
+ if (Current->MatchingParen &&
+ Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
+ assert(IndentLevel > 0);
+ --IndentLevel;
+ }
+ Current->IndentLevel = IndentLevel;
+ if (Current->opensBlockOrBlockTypeList(Style))
+ ++IndentLevel;
}
DEBUG({ printDebugInfo(Line); });
@@ -1910,7 +1972,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Right.is(TT_LambdaArrow))
return 110;
if (Left.is(tok::equal) && Right.is(tok::l_brace))
- return 150;
+ return 160;
if (Left.is(TT_CastRParen))
return 100;
if (Left.is(tok::coloncolon) ||
@@ -2167,7 +2229,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
const FormatToken &Left = *Right.Previous;
if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
return true; // Never ever merge two identifiers.
- if (Style.Language == FormatStyle::LK_Cpp) {
+ if (Style.isCpp()) {
if (Left.is(tok::kw_operator))
return Right.is(tok::coloncolon);
} else if (Style.Language == FormatStyle::LK_Proto) {
@@ -2181,6 +2243,14 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
} else if (Style.Language == FormatStyle::LK_JavaScript) {
if (Left.is(TT_JsFatArrow))
return true;
+ if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
+ Right.MatchingParen) {
+ const FormatToken *Next = Right.MatchingParen->getNextNonComment();
+ // An async arrow function, for example: `x = async () => foo();`,
+ // as opposed to calling a function called async: `x = async();`
+ if (Next && Next->is(TT_JsFatArrow))
+ return true;
+ }
if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
return false;
@@ -2196,8 +2266,12 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if (Right.is(tok::l_paren) && Line.MustBeDeclaration &&
Left.Tok.getIdentifierInfo())
return false;
- if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
- Keywords.kw_of, tok::kw_const) &&
+ if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
+ tok::kw_const) ||
+ // "of" is only a keyword if it appears after another identifier
+ // (e.g. as "const x of y" in a for loop).
+ (Left.is(Keywords.kw_of) && Left.Previous &&
+ Left.Previous->Tok.getIdentifierInfo())) &&
(!Left.Previous || !Left.Previous->is(tok::period)))
return true;
if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
@@ -2227,12 +2301,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
// locations that should have whitespace following are identified by the
// above set of follower tokens.
return false;
- // Postfix non-null assertion operator, as in `foo!.bar()`.
- if (Right.is(tok::exclaim) && (Left.isOneOf(tok::identifier, tok::r_paren,
- tok::r_square, tok::r_brace) ||
- Left.Tok.isLiteral()))
+ if (Right.is(TT_JsNonNullAssertion))
return false;
- if (Left.is(tok::exclaim) && Right.is(Keywords.kw_as))
+ if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as))
return true; // "x! as string"
} else if (Style.Language == FormatStyle::LK_Java) {
if (Left.is(tok::r_square) && Right.is(tok::l_brace))
@@ -2302,12 +2373,16 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if (!Style.SpaceBeforeAssignmentOperators &&
Right.getPrecedence() == prec::Assignment)
return false;
+ if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
+ // Generally don't remove existing spaces between an identifier and "::".
+ // The identifier might actually be a macro name such as ALWAYS_INLINE. If
+ // this turns out to be too lenient, add analysis of the identifier itself.
+ return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
return (Left.is(TT_TemplateOpener) &&
Style.Standard == FormatStyle::LS_Cpp03) ||
- !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren,
- tok::l_square) ||
- Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener));
+ !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
+ tok::kw___super, TT_TemplateCloser, TT_TemplateOpener));
if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
return Style.SpacesInAngles;
if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
@@ -2375,6 +2450,11 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
Right.Next->is(tok::string_literal))
return true;
+ } else if (Style.Language == FormatStyle::LK_Cpp ||
+ Style.Language == FormatStyle::LK_ObjC ||
+ Style.Language == FormatStyle::LK_Proto) {
+ if (Left.isStringLiteral() && Right.isStringLiteral())
+ return true;
}
// If the last token before a '}' is a comma or a trailing comment, the
@@ -2398,9 +2478,6 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
(Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
if (Left.isTrailingComment())
return true;
- if (Left.isStringLiteral() &&
- (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
- return true;
if (Right.Previous->IsUnterminatedLiteral)
return true;
if (Right.is(tok::lessless) && Right.Next &&
@@ -2416,6 +2493,10 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
Style.BreakConstructorInitializersBeforeComma &&
!Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
return true;
+ // Break only if we have multiple inheritance.
+ if (Style.BreakBeforeInheritanceComma &&
+ Right.is(TT_InheritanceComma))
+ return true;
if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
// Raw string literals are special wrt. line breaks. The author has made a
// deliberate choice and might have aligned the contents of the string
@@ -2458,11 +2539,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return true;
} else if (Style.Language == FormatStyle::LK_JavaScript) {
const FormatToken *NonComment = Right.getPreviousNonComment();
- if (Left.isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
- tok::kw_throw) ||
- (NonComment &&
- NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
- tok::kw_throw)))
+ if (NonComment &&
+ NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
+ tok::kw_throw, Keywords.kw_interface,
+ Keywords.kw_type))
return false; // Otherwise a semicolon is inserted.
if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
return false;
@@ -2476,6 +2556,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
if (Right.is(Keywords.kw_as))
return false; // must not break before as in 'x as type' casts
+ if (Left.is(Keywords.kw_as))
+ return true;
+ if (Left.is(TT_JsNonNullAssertion))
+ return true;
if (Left.is(Keywords.kw_declare) &&
Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
Keywords.kw_function, tok::kw_class, tok::kw_enum,
@@ -2485,9 +2569,12 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
// https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
return false;
if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
- Right.isOneOf(tok::identifier, tok::string_literal)) {
+ Right.isOneOf(tok::identifier, tok::string_literal))
return false; // must not break in "module foo { ...}"
- }
+ if (Right.is(TT_TemplateString) && Right.closesScope())
+ return false;
+ if (Left.is(TT_TemplateString) && Left.opensScope())
+ return true;
}
if (Left.is(tok::at))
@@ -2590,6 +2677,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
if (Right.is(TT_CtorInitializerComma) &&
Style.BreakConstructorInitializersBeforeComma)
return true;
+ if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
+ return false;
+ if (Right.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
+ return true;
if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
(Left.is(tok::less) && Right.is(tok::less)))
return false;
@@ -2615,7 +2706,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
tok::colon, tok::l_square, tok::at) ||
(Left.is(tok::r_paren) &&
Right.isOneOf(tok::identifier, tok::kw_const)) ||
- (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
+ (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
+ (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
}
void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
@@ -2627,6 +2719,7 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
<< " T=" << getTokenTypeName(Tok->Type)
<< " S=" << Tok->SpacesRequiredBefore
<< " B=" << Tok->BlockParameterCount
+ << " BK=" << Tok->BlockKind
<< " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
<< " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
<< " FakeLParens=";
diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h
index 97daaf44ba99e..805509533bf93 100644
--- a/lib/Format/TokenAnnotator.h
+++ b/lib/Format/TokenAnnotator.h
@@ -39,6 +39,7 @@ class AnnotatedLine {
public:
AnnotatedLine(const UnwrappedLine &Line)
: First(Line.Tokens.front().Tok), Level(Line.Level),
+ MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
InPPDirective(Line.InPPDirective),
MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
IsMultiVariableDeclStmt(false), Affected(false),
@@ -109,6 +110,7 @@ public:
LineType Type;
unsigned Level;
+ size_t MatchingOpeningBlockLineIndex;
bool InPPDirective;
bool MustBeDeclaration;
bool MightBeFunctionDecl;
@@ -122,7 +124,7 @@ public:
/// input ranges.
bool LeadingEmptyLinesAffected;
- /// \c True if a one of this line's children intersects with an input range.
+ /// \c True if one of this line's children intersects with an input range.
bool ChildrenAffected;
private:
diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp
index d7f1c4232d860..c3c154afeb8a3 100644
--- a/lib/Format/UnwrappedLineFormatter.cpp
+++ b/lib/Format/UnwrappedLineFormatter.cpp
@@ -530,34 +530,33 @@ protected:
if (Previous.Children[0]->First->MustBreakBefore)
return false;
- // Cannot merge multiple statements into a single line.
- if (Previous.Children.size() > 1)
- return false;
-
// Cannot merge into one line if this line ends on a comment.
if (Previous.is(tok::comment))
return false;
+ // Cannot merge multiple statements into a single line.
+ if (Previous.Children.size() > 1)
+ return false;
+
+ const AnnotatedLine *Child = Previous.Children[0];
// We can't put the closing "}" on a line with a trailing comment.
- if (Previous.Children[0]->Last->isTrailingComment())
+ if (Child->Last->isTrailingComment())
return false;
// If the child line exceeds the column limit, we wouldn't want to merge it.
// We add +2 for the trailing " }".
if (Style.ColumnLimit > 0 &&
- Previous.Children[0]->Last->TotalLength + State.Column + 2 >
- Style.ColumnLimit)
+ Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit)
return false;
if (!DryRun) {
Whitespaces->replaceWhitespace(
- *Previous.Children[0]->First,
- /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
+ *Child->First, /*Newlines=*/0, /*Spaces=*/1,
/*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
}
- Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun);
+ Penalty += formatLine(*Child, State.Column + 1, DryRun);
- State.Column += 1 + Previous.Children[0]->Last->TotalLength;
+ State.Column += 1 + Child->Last->TotalLength;
return true;
}
@@ -841,8 +840,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
if (ShouldFormat && TheLine.Type != LT_Invalid) {
if (!DryRun)
- formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
- TheLine.InPPDirective);
+ formatFirstToken(TheLine, PreviousLine, Indent);
NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
@@ -882,9 +880,8 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
TheLine.LeadingEmptyLinesAffected);
// Format the first token.
if (ReformatLeadingWhitespace)
- formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
- TheLine.First->OriginalColumn,
- TheLine.InPPDirective);
+ formatFirstToken(TheLine, PreviousLine,
+ TheLine.First->OriginalColumn);
else
Whitespaces->addUntouchableToken(*TheLine.First,
TheLine.InPPDirective);
@@ -904,15 +901,14 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
return Penalty;
}
-void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
+void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,
const AnnotatedLine *PreviousLine,
- unsigned IndentLevel,
- unsigned Indent,
- bool InPPDirective) {
+ unsigned Indent) {
+ FormatToken& RootToken = *Line.First;
if (RootToken.is(tok::eof)) {
unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
- Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0,
- /*Spaces=*/0, /*TargetColumn=*/0);
+ Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0,
+ /*StartOfTokenColumn=*/0);
return;
}
unsigned Newlines =
@@ -944,9 +940,9 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
(!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
Newlines = std::min(1u, Newlines);
- Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
- Indent, InPPDirective &&
- !RootToken.HasUnescapedNewline);
+ Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent,
+ Line.InPPDirective &&
+ !RootToken.HasUnescapedNewline);
}
unsigned
diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h
index 7bcead9d25e1a..93247f71d6e0f 100644
--- a/lib/Format/UnwrappedLineFormatter.h
+++ b/lib/Format/UnwrappedLineFormatter.h
@@ -44,9 +44,8 @@ public:
private:
/// \brief Add a new line and the required indent before the first Token
/// of the \c UnwrappedLine if there was no structural parsing error.
- void formatFirstToken(FormatToken &RootToken,
- const AnnotatedLine *PreviousLine, unsigned IndentLevel,
- unsigned Indent, bool InPPDirective);
+ void formatFirstToken(const AnnotatedLine &Line,
+ const AnnotatedLine *PreviousLine, unsigned Indent);
/// \brief Returns the column limit for a line, taking into account whether we
/// need an escaped newline due to a continued preprocessor directive.
@@ -57,7 +56,8 @@ private:
// starting from a specific additional offset. Improves performance if there
// are many nested blocks.
std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>,
- unsigned> PenaltyCache;
+ unsigned>
+ PenaltyCache;
ContinuationIndenter *Indenter;
WhitespaceManager *Whitespaces;
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 8fc3b78aee010..5be68ad5c6b82 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -202,7 +202,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback)
: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
- CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
+ CurrentLines(&Lines), Style(Style), Keywords(Keywords),
+ CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
void UnwrappedLineParser::reset() {
@@ -334,8 +335,11 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
case tok::l_brace:
if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
PrevTok->is(tok::colon))
- // In TypeScript's TypeMemberLists, there can be semicolons between the
- // individual members.
+ // A colon indicates this code is in a type, or a braced list following
+ // a label in an object literal ({a: {b: 1}}).
+ // The code below could be confused by semicolons between the individual
+ // members in a type member list, which would normally trigger BK_Block.
+ // In both cases, this must be parsed as an inline braced init.
Tok->BlockKind = BK_BracedInit;
else
Tok->BlockKind = BK_Unknown;
@@ -424,6 +428,8 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
parseParens();
addUnwrappedLine();
+ size_t OpeningLineIndex =
+ Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
MustBeDeclaration);
@@ -449,6 +455,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
if (MunchSemi && FormatTok->Tok.is(tok::semi))
nextToken();
Line->Level = InitialLevel;
+ Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
}
static bool isGoogScope(const UnwrappedLine &Line) {
@@ -582,13 +589,14 @@ void UnwrappedLineParser::conditionalCompilationEnd() {
}
void UnwrappedLineParser::parsePPIf(bool IfDef) {
+ bool IfNDef = FormatTok->is(tok::pp_ifndef);
nextToken();
- bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
- FormatTok->Tok.getLiteralData() != nullptr &&
- StringRef(FormatTok->Tok.getLiteralData(),
- FormatTok->Tok.getLength()) == "0") ||
- FormatTok->Tok.is(tok::kw_false);
- conditionalCompilationStart(!IfDef && IsLiteralFalse);
+ bool Unreachable = false;
+ if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
+ Unreachable = true;
+ if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
+ Unreachable = true;
+ conditionalCompilationStart(Unreachable);
parsePPUnknown();
}
@@ -746,8 +754,7 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
tok::minusminus)))
return addUnwrappedLine();
- if ((PreviousMustBeValue || Previous->is(tok::r_brace)) &&
- isJSDeclOrStmt(Keywords, Next))
+ if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
return addUnwrappedLine();
}
@@ -909,7 +916,8 @@ void UnwrappedLineParser::parseStructuralElement() {
return;
}
}
- if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
+ if (Style.isCpp() &&
+ FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
Keywords.kw_slots, Keywords.kw_qslots)) {
nextToken();
if (FormatTok->is(tok::colon)) {
@@ -943,7 +951,7 @@ void UnwrappedLineParser::parseStructuralElement() {
if (!parseEnum())
break;
// This only applies for C++.
- if (Style.Language != FormatStyle::LK_Cpp) {
+ if (!Style.isCpp()) {
addUnwrappedLine();
return;
}
@@ -1124,7 +1132,7 @@ void UnwrappedLineParser::parseStructuralElement() {
}
bool UnwrappedLineParser::tryToParseLambda() {
- if (Style.Language != FormatStyle::LK_Cpp) {
+ if (!Style.isCpp()) {
nextToken();
return false;
}
@@ -1298,6 +1306,12 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
continue;
}
}
+ if (FormatTok->is(tok::l_brace)) {
+ // Could be a method inside of a braced list `{a() { return 1; }}`.
+ if (tryToParseBracedList())
+ continue;
+ parseChildBlock();
+ }
}
switch (FormatTok->Tok.getKind()) {
case tok::caret:
@@ -1309,12 +1323,6 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
case tok::l_square:
tryToParseLambda();
break;
- case tok::l_brace:
- // Assume there are no blocks inside a braced init list apart
- // from the ones we explicitly parse out (like lambdas).
- FormatTok->BlockKind = BK_BracedInit;
- parseBracedList();
- break;
case tok::l_paren:
parseParens();
// JavaScript can just have free standing methods and getters/setters in
@@ -1325,6 +1333,12 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
break;
}
break;
+ case tok::l_brace:
+ // Assume there are no blocks inside a braced init list apart
+ // from the ones we explicitly parse out (like lambdas).
+ FormatTok->BlockKind = BK_BracedInit;
+ parseBracedList();
+ break;
case tok::r_brace:
nextToken();
return !HasError;
@@ -1381,6 +1395,12 @@ void UnwrappedLineParser::parseParens() {
if (FormatTok->Tok.is(tok::l_brace))
parseBracedList();
break;
+ case tok::kw_class:
+ if (Style.Language == FormatStyle::LK_JavaScript)
+ parseRecord(/*ParseAsExpr=*/true);
+ else
+ nextToken();
+ break;
case tok::identifier:
if (Style.Language == FormatStyle::LK_JavaScript &&
(FormatTok->is(Keywords.kw_function) ||
@@ -1722,8 +1742,7 @@ bool UnwrappedLineParser::parseEnum() {
nextToken();
// If there are two identifiers in a row, this is likely an elaborate
// return type. In Java, this can be "implements", etc.
- if (Style.Language == FormatStyle::LK_Cpp &&
- FormatTok->is(tok::identifier))
+ if (Style.isCpp() && FormatTok->is(tok::identifier))
return false;
}
}
@@ -1819,7 +1838,7 @@ void UnwrappedLineParser::parseJavaEnumBody() {
addUnwrappedLine();
}
-void UnwrappedLineParser::parseRecord() {
+void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
const FormatToken &InitialToken = *FormatTok;
nextToken();
@@ -1863,11 +1882,15 @@ void UnwrappedLineParser::parseRecord() {
}
}
if (FormatTok->Tok.is(tok::l_brace)) {
- if (ShouldBreakBeforeBrace(Style, InitialToken))
- addUnwrappedLine();
+ if (ParseAsExpr) {
+ parseChildBlock();
+ } else {
+ if (ShouldBreakBeforeBrace(Style, InitialToken))
+ addUnwrappedLine();
- parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
- /*MunchSemi=*/false);
+ parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
+ /*MunchSemi=*/false);
+ }
}
// There is no addUnwrappedLine() here so that we fall through to parsing a
// structural element afterwards. Thus, in "class A {} n, m;",
@@ -1999,7 +2022,9 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
E = Line.Tokens.end();
I != E; ++I) {
- llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
+ llvm::dbgs() << I->Tok->Tok.getName() << "["
+ << "T=" << I->Tok->Type
+ << ", OC=" << I->Tok->OriginalColumn << "] ";
}
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
E = Line.Tokens.end();
@@ -2024,6 +2049,7 @@ void UnwrappedLineParser::addUnwrappedLine() {
});
CurrentLines->push_back(std::move(*Line));
Line->Tokens.clear();
+ Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
CurrentLines->append(
std::make_move_iterator(PreprocessorDirectives.begin()),
@@ -2039,13 +2065,139 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
FormatTok.NewlinesBefore > 0;
}
+static bool isLineComment(const FormatToken &FormatTok) {
+ return FormatTok.is(tok::comment) &&
+ FormatTok.TokenText.startswith("//");
+}
+
+// Checks if \p FormatTok is a line comment that continues the line comment
+// section on \p Line.
+static bool continuesLineComment(const FormatToken &FormatTok,
+ const UnwrappedLine &Line,
+ llvm::Regex &CommentPragmasRegex) {
+ if (Line.Tokens.empty())
+ return false;
+
+ StringRef IndentContent = FormatTok.TokenText;
+ if (FormatTok.TokenText.startswith("//") ||
+ FormatTok.TokenText.startswith("/*"))
+ IndentContent = FormatTok.TokenText.substr(2);
+ if (CommentPragmasRegex.match(IndentContent))
+ return false;
+
+ // If Line starts with a line comment, then FormatTok continues the comment
+ // section if its original column is greater or equal to the original start
+ // column of the line.
+ //
+ // Define the min column token of a line as follows: if a line ends in '{' or
+ // contains a '{' followed by a line comment, then the min column token is
+ // that '{'. Otherwise, the min column token of the line is the first token of
+ // the line.
+ //
+ // If Line starts with a token other than a line comment, then FormatTok
+ // continues the comment section if its original column is greater than the
+ // original start column of the min column token of the line.
+ //
+ // For example, the second line comment continues the first in these cases:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // int i; // first line
+ // // second line
+ //
+ // and:
+ //
+ // do { // first line
+ // // second line
+ // int i;
+ // } while (true);
+ //
+ // and:
+ //
+ // enum {
+ // a, // first line
+ // // second line
+ // b
+ // };
+ //
+ // The second line comment doesn't continue the first in these cases:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // int i; // first line
+ // // second line
+ //
+ // and:
+ //
+ // do { // first line
+ // // second line
+ // int i;
+ // } while (true);
+ //
+ // and:
+ //
+ // enum {
+ // a, // first line
+ // // second line
+ // };
+ const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
+
+ // Scan for '{//'. If found, use the column of '{' as a min column for line
+ // comment section continuation.
+ const FormatToken *PreviousToken = nullptr;
+ for (const UnwrappedLineNode &Node : Line.Tokens) {
+ if (PreviousToken && PreviousToken->is(tok::l_brace) &&
+ isLineComment(*Node.Tok)) {
+ MinColumnToken = PreviousToken;
+ break;
+ }
+ PreviousToken = Node.Tok;
+
+ // Grab the last newline preceding a token in this unwrapped line.
+ if (Node.Tok->NewlinesBefore > 0) {
+ MinColumnToken = Node.Tok;
+ }
+ }
+ if (PreviousToken && PreviousToken->is(tok::l_brace)) {
+ MinColumnToken = PreviousToken;
+ }
+
+ unsigned MinContinueColumn =
+ MinColumnToken->OriginalColumn +
+ (isLineComment(*MinColumnToken) ? 0 : 1);
+ return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
+ isLineComment(*(Line.Tokens.back().Tok)) &&
+ FormatTok.OriginalColumn >= MinContinueColumn;
+}
+
void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
bool JustComments = Line->Tokens.empty();
for (SmallVectorImpl<FormatToken *>::const_iterator
I = CommentsBeforeNextToken.begin(),
E = CommentsBeforeNextToken.end();
I != E; ++I) {
- if (isOnNewLine(**I) && JustComments)
+ // Line comments that belong to the same line comment section are put on the
+ // same line since later we might want to reflow content between them.
+ // Additional fine-grained breaking of line comment sections is controlled
+ // by the class BreakableLineCommentSection in case it is desirable to keep
+ // several line comment sections in the same unwrapped line.
+ //
+ // FIXME: Consider putting separate line comment sections as children to the
+ // unwrapped line instead.
+ (*I)->ContinuesLineCommentSection =
+ continuesLineComment(**I, *Line, CommentPragmasRegex);
+ if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
addUnwrappedLine();
pushToken(*I);
}
@@ -2073,13 +2225,71 @@ const FormatToken *UnwrappedLineParser::getPreviousToken() {
return Line->Tokens.back().Tok;
}
+void UnwrappedLineParser::distributeComments(
+ const SmallVectorImpl<FormatToken *> &Comments,
+ const FormatToken *NextTok) {
+ // Whether or not a line comment token continues a line is controlled by
+ // the method continuesLineComment, with the following caveat:
+ //
+ // Define a trail of Comments to be a nonempty proper postfix of Comments such
+ // that each comment line from the trail is aligned with the next token, if
+ // the next token exists. If a trail exists, the beginning of the maximal
+ // trail is marked as a start of a new comment section.
+ //
+ // For example in this code:
+ //
+ // int a; // line about a
+ // // line 1 about b
+ // // line 2 about b
+ // int b;
+ //
+ // the two lines about b form a maximal trail, so there are two sections, the
+ // first one consisting of the single comment "// line about a" and the
+ // second one consisting of the next two comments.
+ if (Comments.empty())
+ return;
+ bool ShouldPushCommentsInCurrentLine = true;
+ bool HasTrailAlignedWithNextToken = false;
+ unsigned StartOfTrailAlignedWithNextToken = 0;
+ if (NextTok) {
+ // We are skipping the first element intentionally.
+ for (unsigned i = Comments.size() - 1; i > 0; --i) {
+ if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
+ HasTrailAlignedWithNextToken = true;
+ StartOfTrailAlignedWithNextToken = i;
+ }
+ }
+ }
+ for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
+ FormatToken *FormatTok = Comments[i];
+ if (HasTrailAlignedWithNextToken &&
+ i == StartOfTrailAlignedWithNextToken) {
+ FormatTok->ContinuesLineCommentSection = false;
+ } else {
+ FormatTok->ContinuesLineCommentSection =
+ continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
+ }
+ if (!FormatTok->ContinuesLineCommentSection &&
+ (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
+ ShouldPushCommentsInCurrentLine = false;
+ }
+ if (ShouldPushCommentsInCurrentLine) {
+ pushToken(FormatTok);
+ } else {
+ CommentsBeforeNextToken.push_back(FormatTok);
+ }
+ }
+}
+
void UnwrappedLineParser::readToken() {
- bool CommentsInCurrentLine = true;
+ SmallVector<FormatToken *, 1> Comments;
do {
FormatTok = Tokens->getNextToken();
assert(FormatTok);
while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
(FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
+ distributeComments(Comments, FormatTok);
+ Comments.clear();
// If there is an unfinished unwrapped line, we flush the preprocessor
// directives only after that unwrapped line was finished later.
bool SwitchToPreprocessorLines = !Line->Tokens.empty();
@@ -2109,17 +2319,17 @@ void UnwrappedLineParser::readToken() {
continue;
}
- if (!FormatTok->Tok.is(tok::comment))
+ if (!FormatTok->Tok.is(tok::comment)) {
+ distributeComments(Comments, FormatTok);
+ Comments.clear();
return;
- if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
- CommentsInCurrentLine = false;
- }
- if (CommentsInCurrentLine) {
- pushToken(FormatTok);
- } else {
- CommentsBeforeNextToken.push_back(FormatTok);
}
+
+ Comments.push_back(FormatTok);
} while (!eof());
+
+ distributeComments(Comments, nullptr);
+ Comments.clear();
}
void UnwrappedLineParser::pushToken(FormatToken *Tok) {
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 9c78d33632c6f..15d1d9cda7a28 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -19,6 +19,7 @@
#include "FormatToken.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
#include <list>
#include <stack>
@@ -47,6 +48,14 @@ struct UnwrappedLine {
bool InPPDirective;
bool MustBeDeclaration;
+
+ /// \brief If this \c UnwrappedLine closes a block in a sequence of lines,
+ /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding
+ /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be
+ /// \c kInvalidIndex.
+ size_t MatchingOpeningBlockLineIndex;
+
+ static const size_t kInvalidIndex = -1;
};
class UnwrappedLineConsumer {
@@ -99,7 +108,10 @@ private:
void parseAccessSpecifier();
bool parseEnum();
void parseJavaEnumBody();
- void parseRecord();
+ // Parses a record (aka class) as a top level element. If ParseAsExpr is true,
+ // parses the record as a child block, i.e. if the class declaration is an
+ // expression.
+ void parseRecord(bool ParseAsExpr = false);
void parseObjCProtocolList();
void parseObjCUntilAtEnd();
void parseObjCInterfaceOrImplementation();
@@ -113,6 +125,21 @@ private:
void nextToken();
const FormatToken *getPreviousToken();
void readToken();
+
+ // Decides which comment tokens should be added to the current line and which
+ // should be added as comments before the next token.
+ //
+ // Comments specifies the sequence of comment tokens to analyze. They get
+ // either pushed to the current line or added to the comments before the next
+ // token.
+ //
+ // NextTok specifies the next token. A null pointer NextTok is supported, and
+ // signifies either the absense of a next token, or that the next token
+ // shouldn't be taken into accunt for the analysis.
+ void distributeComments(const SmallVectorImpl<FormatToken *> &Comments,
+ const FormatToken *NextTok);
+
+ // Adds the comment preceding the next token to unwrapped lines.
void flushComments(bool NewlineBeforeNext);
void pushToken(FormatToken *Tok);
void calculateBraceTypes(bool ExpectClassBody = false);
@@ -162,6 +189,8 @@ private:
const FormatStyle &Style;
const AdditionalKeywords &Keywords;
+ llvm::Regex CommentPragmasRegex;
+
FormatTokenSource *Tokens;
UnwrappedLineConsumer &Callback;
@@ -213,8 +242,8 @@ struct UnwrappedLineNode {
SmallVector<UnwrappedLine, 0> Children;
};
-inline UnwrappedLine::UnwrappedLine()
- : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
+inline UnwrappedLine::UnwrappedLine() : Level(0), InPPDirective(false),
+ MustBeDeclaration(false), MatchingOpeningBlockLineIndex(kInvalidIndex) {}
} // end namespace format
} // end namespace clang
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index b64506f39035f..2c1f59324971f 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -25,64 +25,60 @@ operator()(const Change &C1, const Change &C2) const {
C2.OriginalWhitespaceRange.getBegin());
}
-WhitespaceManager::Change::Change(
- bool CreateReplacement, SourceRange OriginalWhitespaceRange,
- unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
- unsigned NewlinesBefore, StringRef PreviousLinePostfix,
- StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective,
- bool IsStartOfDeclName, bool IsInsideToken)
- : CreateReplacement(CreateReplacement),
+WhitespaceManager::Change::Change(const FormatToken &Tok,
+ bool CreateReplacement,
+ SourceRange OriginalWhitespaceRange,
+ int Spaces, unsigned StartOfTokenColumn,
+ unsigned NewlinesBefore,
+ StringRef PreviousLinePostfix,
+ StringRef CurrentLinePrefix,
+ bool ContinuesPPDirective, bool IsInsideToken)
+ : Tok(&Tok), CreateReplacement(CreateReplacement),
OriginalWhitespaceRange(OriginalWhitespaceRange),
StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
PreviousLinePostfix(PreviousLinePostfix),
- CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
- ContinuesPPDirective(ContinuesPPDirective),
- IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel),
- Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false),
- TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
+ CurrentLinePrefix(CurrentLinePrefix),
+ ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces),
+ IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0),
+ PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
StartOfBlockComment(nullptr), IndentationOffset(0) {}
void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
- unsigned IndentLevel, unsigned Spaces,
+ unsigned Spaces,
unsigned StartOfTokenColumn,
bool InPPDirective) {
if (Tok.Finalized)
return;
Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
- Changes.push_back(
- Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel,
- Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(),
- InPPDirective && !Tok.IsFirst,
- Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
- /*IsInsideToken=*/false));
+ Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
+ Spaces, StartOfTokenColumn, Newlines, "", "",
+ InPPDirective && !Tok.IsFirst,
+ /*IsInsideToken=*/false));
}
void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
bool InPPDirective) {
if (Tok.Finalized)
return;
- Changes.push_back(Change(
- /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0,
- /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
- Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
- Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
- /*IsInsideToken=*/false));
+ Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
+ Tok.WhitespaceRange, /*Spaces=*/0,
+ Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
+ InPPDirective && !Tok.IsFirst,
+ /*IsInsideToken=*/false));
}
void WhitespaceManager::replaceWhitespaceInToken(
const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
- unsigned Newlines, unsigned IndentLevel, int Spaces) {
+ unsigned Newlines, int Spaces) {
if (Tok.Finalized)
return;
SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
- Changes.push_back(Change(
- true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)),
- IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix,
- CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown,
- InPPDirective && !Tok.IsFirst,
- Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
- /*IsInsideToken=*/Newlines == 0));
+ Changes.push_back(
+ Change(Tok, /*CreateReplacement=*/true,
+ SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces,
+ std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix,
+ InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true));
}
const tooling::Replacements &WhitespaceManager::generateReplacements() {
@@ -125,30 +121,64 @@ void WhitespaceManager::calculateLineBreakInformation() {
Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
Changes[i - 1].IsTrailingComment =
- (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof ||
- (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) &&
- Changes[i - 1].Kind == tok::comment;
+ (Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) ||
+ (Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) &&
+ Changes[i - 1].Tok->is(tok::comment) &&
+ // FIXME: This is a dirty hack. The problem is that
+ // BreakableLineCommentSection does comment reflow changes and here is
+ // the aligning of trailing comments. Consider the case where we reflow
+ // the second line up in this example:
+ //
+ // // line 1
+ // // line 2
+ //
+ // That amounts to 2 changes by BreakableLineCommentSection:
+ // - the first, delimited by (), for the whitespace between the tokens,
+ // - and second, delimited by [], for the whitespace at the beginning
+ // of the second token:
+ //
+ // // line 1(
+ // )[// ]line 2
+ //
+ // So in the end we have two changes like this:
+ //
+ // // line1()[ ]line 2
+ //
+ // Note that the OriginalWhitespaceStart of the second change is the
+ // same as the PreviousOriginalWhitespaceEnd of the first change.
+ // In this case, the below check ensures that the second change doesn't
+ // get treated as a trailing comment change here, since this might
+ // trigger additional whitespace to be wrongly inserted before "line 2"
+ // by the comment aligner here.
+ //
+ // For a proper solution we need a mechanism to say to WhitespaceManager
+ // that a particular change breaks the current sequence of trailing
+ // comments.
+ OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
}
// FIXME: The last token is currently not always an eof token; in those
// cases, setting TokenLength of the last token to 0 is wrong.
Changes.back().TokenLength = 0;
- Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment;
+ Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment);
const WhitespaceManager::Change *LastBlockComment = nullptr;
for (auto &Change : Changes) {
// Reset the IsTrailingComment flag for changes inside of trailing comments
- // so they don't get realigned later.
- if (Change.IsInsideToken)
+ // so they don't get realigned later. Comment line breaks however still need
+ // to be aligned.
+ if (Change.IsInsideToken && Change.NewlinesBefore == 0)
Change.IsTrailingComment = false;
Change.StartOfBlockComment = nullptr;
Change.IndentationOffset = 0;
- if (Change.Kind == tok::comment) {
- LastBlockComment = &Change;
- } else if (Change.Kind == tok::unknown) {
- if ((Change.StartOfBlockComment = LastBlockComment))
- Change.IndentationOffset =
- Change.StartOfTokenColumn -
- Change.StartOfBlockComment->StartOfTokenColumn;
+ if (Change.Tok->is(tok::comment)) {
+ if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken)
+ LastBlockComment = &Change;
+ else {
+ if ((Change.StartOfBlockComment = LastBlockComment))
+ Change.IndentationOffset =
+ Change.StartOfTokenColumn -
+ Change.StartOfBlockComment->StartOfTokenColumn;
+ }
} else {
LastBlockComment = nullptr;
}
@@ -162,21 +192,56 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
SmallVector<WhitespaceManager::Change, 16> &Changes) {
bool FoundMatchOnLine = false;
int Shift = 0;
+
+ // ScopeStack keeps track of the current scope depth. It contains indices of
+ // the first token on each scope.
+ // We only run the "Matches" function on tokens from the outer-most scope.
+ // However, we do need to pay special attention to one class of tokens
+ // that are not in the outer-most scope, and that is function parameters
+ // which are split across multiple lines, as illustrated by this example:
+ // double a(int x);
+ // int b(int y,
+ // double z);
+ // In the above example, we need to take special care to ensure that
+ // 'double z' is indented along with it's owning function 'b'.
+ SmallVector<unsigned, 16> ScopeStack;
+
for (unsigned i = Start; i != End; ++i) {
- if (Changes[i].NewlinesBefore > 0) {
- FoundMatchOnLine = false;
+ if (ScopeStack.size() != 0 &&
+ Changes[i].nestingAndIndentLevel() <
+ Changes[ScopeStack.back()].nestingAndIndentLevel())
+ ScopeStack.pop_back();
+
+ if (i != Start && Changes[i].nestingAndIndentLevel() >
+ Changes[i - 1].nestingAndIndentLevel())
+ ScopeStack.push_back(i);
+
+ bool InsideNestedScope = ScopeStack.size() != 0;
+
+ if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) {
Shift = 0;
+ FoundMatchOnLine = false;
}
// If this is the first matching token to be aligned, remember by how many
// spaces it has to be shifted, so the rest of the changes on the line are
// shifted by the same amount
- if (!FoundMatchOnLine && Matches(Changes[i])) {
+ if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) {
FoundMatchOnLine = true;
Shift = Column - Changes[i].StartOfTokenColumn;
Changes[i].Spaces += Shift;
}
+ // This is for function parameters that are split across multiple lines,
+ // as mentioned in the ScopeStack comment.
+ if (InsideNestedScope && Changes[i].NewlinesBefore > 0) {
+ unsigned ScopeStart = ScopeStack.back();
+ if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) ||
+ (ScopeStart > Start + 1 &&
+ Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)))
+ Changes[i].Spaces += Shift;
+ }
+
assert(Shift >= 0);
Changes[i].StartOfTokenColumn += Shift;
if (i + 1 != Changes.size())
@@ -184,15 +249,37 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
}
}
-// Walk through all of the changes and find sequences of matching tokens to
-// align. To do so, keep track of the lines and whether or not a matching token
-// was found on a line. If a matching token is found, extend the current
-// sequence. If the current line cannot be part of a sequence, e.g. because
-// there is an empty line before it or it contains only non-matching tokens,
-// finalize the previous sequence.
+// Walk through a subset of the changes, starting at StartAt, and find
+// sequences of matching tokens to align. To do so, keep track of the lines and
+// whether or not a matching token was found on a line. If a matching token is
+// found, extend the current sequence. If the current line cannot be part of a
+// sequence, e.g. because there is an empty line before it or it contains only
+// non-matching tokens, finalize the previous sequence.
+// The value returned is the token on which we stopped, either because we
+// exhausted all items inside Changes, or because we hit a scope level higher
+// than our initial scope.
+// This function is recursive. Each invocation processes only the scope level
+// equal to the initial level, which is the level of Changes[StartAt].
+// If we encounter a scope level greater than the initial level, then we call
+// ourselves recursively, thereby avoiding the pollution of the current state
+// with the alignment requirements of the nested sub-level. This recursive
+// behavior is necessary for aligning function prototypes that have one or more
+// arguments.
+// If this function encounters a scope level less than the initial level,
+// it returns the current position.
+// There is a non-obvious subtlety in the recursive behavior: Even though we
+// defer processing of nested levels to recursive invocations of this
+// function, when it comes time to align a sequence of tokens, we run the
+// alignment on the entire sequence, including the nested levels.
+// When doing so, most of the nested tokens are skipped, because their
+// alignment was already handled by the recursive invocations of this function.
+// However, the special exception is that we do NOT skip function parameters
+// that are split across multiple lines. See the test case in FormatTest.cpp
+// that mentions "split function parameter alignment" for an example of this.
template <typename F>
-static void AlignTokens(const FormatStyle &Style, F &&Matches,
- SmallVector<WhitespaceManager::Change, 16> &Changes) {
+static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
+ SmallVector<WhitespaceManager::Change, 16> &Changes,
+ unsigned StartAt) {
unsigned MinColumn = 0;
unsigned MaxColumn = UINT_MAX;
@@ -200,14 +287,11 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches,
unsigned StartOfSequence = 0;
unsigned EndOfSequence = 0;
- // Keep track of the nesting level of matching tokens, i.e. the number of
- // surrounding (), [], or {}. We will only align a sequence of matching
- // token that share the same scope depth.
- //
- // FIXME: This could use FormatToken::NestingLevel information, but there is
- // an outstanding issue wrt the brace scopes.
- unsigned NestingLevelOfLastMatch = 0;
- unsigned NestingLevel = 0;
+ // Measure the scope level (i.e. depth of (), [], {}) of the first token, and
+ // abort when we hit any token in a higher scope than the starting one.
+ auto NestingAndIndentLevel = StartAt < Changes.size()
+ ? Changes[StartAt].nestingAndIndentLevel()
+ : std::pair<unsigned, unsigned>(0, 0);
// Keep track of the number of commas before the matching tokens, we will only
// align a sequence of matching tokens if they are preceded by the same number
@@ -235,7 +319,11 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches,
EndOfSequence = 0;
};
- for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
+ unsigned i = StartAt;
+ for (unsigned e = Changes.size(); i != e; ++i) {
+ if (Changes[i].nestingAndIndentLevel() < NestingAndIndentLevel)
+ break;
+
if (Changes[i].NewlinesBefore != 0) {
CommasBeforeMatch = 0;
EndOfSequence = i;
@@ -247,33 +335,24 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches,
FoundMatchOnLine = false;
}
- if (Changes[i].Kind == tok::comma) {
+ if (Changes[i].Tok->is(tok::comma)) {
++CommasBeforeMatch;
- } else if (Changes[i].Kind == tok::r_brace ||
- Changes[i].Kind == tok::r_paren ||
- Changes[i].Kind == tok::r_square) {
- --NestingLevel;
- } else if (Changes[i].Kind == tok::l_brace ||
- Changes[i].Kind == tok::l_paren ||
- Changes[i].Kind == tok::l_square) {
- // We want sequences to skip over child scopes if possible, but not the
- // other way around.
- NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel);
- ++NestingLevel;
+ } else if (Changes[i].nestingAndIndentLevel() > NestingAndIndentLevel) {
+ // Call AlignTokens recursively, skipping over this scope block.
+ unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i);
+ i = StoppedAt - 1;
+ continue;
}
if (!Matches(Changes[i]))
continue;
// If there is more than one matching token per line, or if the number of
- // preceding commas, or the scope depth, do not match anymore, end the
- // sequence.
- if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch ||
- NestingLevel != NestingLevelOfLastMatch)
+ // preceding commas, do not match anymore, end the sequence.
+ if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch)
AlignCurrentSequence();
CommasBeforeLastMatch = CommasBeforeMatch;
- NestingLevelOfLastMatch = NestingLevel;
FoundMatchOnLine = true;
if (StartOfSequence == 0)
@@ -296,8 +375,9 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches,
MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
}
- EndOfSequence = Changes.size();
+ EndOfSequence = i;
AlignCurrentSequence();
+ return i;
}
void WhitespaceManager::alignConsecutiveAssignments() {
@@ -314,9 +394,9 @@ void WhitespaceManager::alignConsecutiveAssignments() {
if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
return false;
- return C.Kind == tok::equal;
+ return C.Tok->is(tok::equal);
},
- Changes);
+ Changes, /*StartAt=*/0);
}
void WhitespaceManager::alignConsecutiveDeclarations() {
@@ -329,9 +409,15 @@ void WhitespaceManager::alignConsecutiveDeclarations() {
// const char* const* v1;
// float const* v2;
// SomeVeryLongType const& v3;
-
- AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; },
- Changes);
+ AlignTokens(Style,
+ [](Change const &C) {
+ // tok::kw_operator is necessary for aligning operator overload
+ // definitions.
+ return C.Tok->is(TT_StartOfName) ||
+ C.Tok->is(TT_FunctionDeclarationName) ||
+ C.Tok->is(tok::kw_operator);
+ },
+ Changes, /*StartAt=*/0);
}
void WhitespaceManager::alignTrailingComments() {
@@ -360,17 +446,14 @@ void WhitespaceManager::alignTrailingComments() {
// If this comment follows an } in column 0, it probably documents the
// closing of a namespace and we don't want to align it.
bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
- Changes[i - 1].Kind == tok::r_brace &&
+ Changes[i - 1].Tok->is(tok::r_brace) &&
Changes[i - 1].StartOfTokenColumn == 0;
bool WasAlignedWithStartOfNextLine = false;
if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
Changes[i].OriginalWhitespaceRange.getEnd());
for (unsigned j = i + 1; j != e; ++j) {
- if (Changes[j].Kind == tok::comment ||
- Changes[j].Kind == tok::unknown)
- // Skip over comments and unknown tokens. "unknown tokens are used for
- // the continuation of multiline comments.
+ if (Changes[j].Tok->is(tok::comment))
continue;
unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
@@ -481,7 +564,8 @@ void WhitespaceManager::generateChanges() {
C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
else
appendNewlineText(ReplacementText, C.NewlinesBefore);
- appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces),
+ appendIndentText(ReplacementText, C.Tok->IndentLevel,
+ std::max(0, C.Spaces),
C.StartOfTokenColumn - std::max(0, C.Spaces));
ReplacementText.append(C.CurrentLinePrefix);
storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h
index f42e371830b3d..6be4af2622766 100644
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h
@@ -43,8 +43,7 @@ public:
/// \brief Replaces the whitespace in front of \p Tok. Only call once for
/// each \c AnnotatedToken.
- void replaceWhitespace(FormatToken &Tok, unsigned Newlines,
- unsigned IndentLevel, unsigned Spaces,
+ void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
unsigned StartOfTokenColumn,
bool InPPDirective = false);
@@ -72,8 +71,7 @@ public:
unsigned ReplaceChars,
StringRef PreviousPostfix,
StringRef CurrentPrefix, bool InPPDirective,
- unsigned Newlines, unsigned IndentLevel,
- int Spaces);
+ unsigned Newlines, int Spaces);
/// \brief Returns all the \c Replacements created during formatting.
const tooling::Replacements &generateReplacements();
@@ -91,8 +89,6 @@ public:
const SourceManager &SourceMgr;
};
- Change() {}
-
/// \brief Creates a \c Change.
///
/// The generated \c Change will replace the characters at
@@ -102,12 +98,17 @@ public:
///
/// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
/// trailing comments and escaped newlines.
- Change(bool CreateReplacement, SourceRange OriginalWhitespaceRange,
- unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
- unsigned NewlinesBefore, StringRef PreviousLinePostfix,
- StringRef CurrentLinePrefix, tok::TokenKind Kind,
- bool ContinuesPPDirective, bool IsStartOfDeclName,
- bool IsInsideToken);
+ Change(const FormatToken &Tok, bool CreateReplacement,
+ SourceRange OriginalWhitespaceRange, int Spaces,
+ unsigned StartOfTokenColumn, unsigned NewlinesBefore,
+ StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
+ bool ContinuesPPDirective, bool IsInsideToken);
+
+ // The kind of the token whose whitespace this change replaces, or in which
+ // this change inserts whitespace.
+ // FIXME: Currently this is not set correctly for breaks inside comments, as
+ // the \c BreakableToken is still doing its own alignment.
+ const FormatToken *Tok;
bool CreateReplacement;
// Changes might be in the middle of a token, so we cannot just keep the
@@ -117,18 +118,7 @@ public:
unsigned NewlinesBefore;
std::string PreviousLinePostfix;
std::string CurrentLinePrefix;
- // The kind of the token whose whitespace this change replaces, or in which
- // this change inserts whitespace.
- // FIXME: Currently this is not set correctly for breaks inside comments, as
- // the \c BreakableToken is still doing its own alignment.
- tok::TokenKind Kind;
bool ContinuesPPDirective;
- bool IsStartOfDeclName;
-
- // The number of nested blocks the token is in. This is used to add tabs
- // only for the indentation, and not for alignment, when
- // UseTab = US_ForIndentation.
- unsigned IndentLevel;
// The number of spaces in front of the token or broken part of the token.
// This will be adapted when aligning tokens.
@@ -159,6 +149,14 @@ public:
// the alignment process.
const Change *StartOfBlockComment;
int IndentationOffset;
+
+ // A combination of nesting level and indent level, which are used in
+ // tandem to compute lexical scope, for the purposes of deciding
+ // when to stop consecutive alignment runs.
+ std::pair<unsigned, unsigned>
+ nestingAndIndentLevel() const {
+ return std::make_pair(Tok->NestingLevel, Tok->IndentLevel);
+ }
};
private: