summaryrefslogtreecommitdiff
path: root/lib/Format/ContinuationIndenter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Format/ContinuationIndenter.cpp')
-rw-r--r--lib/Format/ContinuationIndenter.cpp747
1 files changed, 601 insertions, 146 deletions
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 3bf1cd8f7c13..a3d38b244c5c 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -12,8 +12,9 @@
///
//===----------------------------------------------------------------------===//
-#include "BreakableToken.h"
#include "ContinuationIndenter.h"
+#include "BreakableToken.h"
+#include "FormatInternal.h"
#include "WhitespaceManager.h"
#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Basic/SourceManager.h"
@@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok,
(LessTok.Previous && LessTok.Previous->is(tok::equal))));
}
+// Returns the delimiter of a raw string literal, or None if TokenText is not
+// the text of a raw string literal. The delimiter could be the empty string.
+// For example, the delimiter of R"deli(cont)deli" is deli.
+static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
+ if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'.
+ || !TokenText.startswith("R\"") || !TokenText.endswith("\""))
+ return None;
+
+ // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has
+ // size at most 16 by the standard, so the first '(' must be among the first
+ // 19 bytes.
+ size_t LParenPos = TokenText.substr(0, 19).find_first_of('(');
+ if (LParenPos == StringRef::npos)
+ return None;
+ StringRef Delimiter = TokenText.substr(2, LParenPos - 2);
+
+ // Check that the string ends in ')Delimiter"'.
+ size_t RParenPos = TokenText.size() - Delimiter.size() - 2;
+ if (TokenText[RParenPos] != ')')
+ return None;
+ if (!TokenText.substr(RParenPos + 1).startswith(Delimiter))
+ return None;
+ return Delimiter;
+}
+
+RawStringFormatStyleManager::RawStringFormatStyleManager(
+ const FormatStyle &CodeStyle) {
+ for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
+ FormatStyle Style;
+ if (!getPredefinedStyle(RawStringFormat.BasedOnStyle,
+ RawStringFormat.Language, &Style)) {
+ Style = getLLVMStyle();
+ Style.Language = RawStringFormat.Language;
+ }
+ Style.ColumnLimit = CodeStyle.ColumnLimit;
+ DelimiterStyle.insert({RawStringFormat.Delimiter, Style});
+ }
+}
+
+llvm::Optional<FormatStyle>
+RawStringFormatStyleManager::get(StringRef Delimiter) const {
+ auto It = DelimiterStyle.find(Delimiter);
+ if (It == DelimiterStyle.end())
+ return None;
+ return It->second;
+}
+
ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
const AdditionalKeywords &Keywords,
const SourceManager &SourceMgr,
@@ -85,20 +133,32 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
: Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),
Whitespaces(Whitespaces), Encoding(Encoding),
BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),
- CommentPragmasRegex(Style.CommentPragmas) {}
+ CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {}
LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
+ unsigned FirstStartColumn,
const AnnotatedLine *Line,
bool DryRun) {
LineState State;
State.FirstIndent = FirstIndent;
- State.Column = FirstIndent;
+ if (FirstStartColumn && Line->First->NewlinesBefore == 0)
+ State.Column = FirstStartColumn;
+ else
+ State.Column = FirstIndent;
+ // With preprocessor directive indentation, the line starts on column 0
+ // since it's indented after the hash, but FirstIndent is set to the
+ // preprocessor indent.
+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash &&
+ (Line->Type == LT_PreprocessorDirective ||
+ Line->Type == LT_ImportStatement))
+ State.Column = 0;
State.Line = Line;
State.NextToken = Line->First;
State.Stack.push_back(ParenState(FirstIndent, FirstIndent,
/*AvoidBinPacking=*/false,
/*NoLineBreak=*/false));
State.LineContainsContinuedForLoopSection = false;
+ State.NoContinuation = false;
State.StartOfStringLiteral = 0;
State.StartOfLineLevel = 0;
State.LowestLevelOnLine = 0;
@@ -120,9 +180,8 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
const FormatToken &Current = *State.NextToken;
const FormatToken &Previous = *Current.Previous;
assert(&Previous == Current.Previous);
- if (!Current.CanBreakBefore &&
- !(State.Stack.back().BreakBeforeClosingBrace &&
- Current.closesBlockOrBlockTypeList(Style)))
+ if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace &&
+ Current.closesBlockOrBlockTypeList(Style)))
return false;
// The opening "{" of a braced list has to be on the same line as the first
// element if it is nested in another braced init list or function call.
@@ -264,7 +323,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
// We need special cases for ">>" which we have split into two ">" while
// lexing in order to make template parsing easier.
bool IsComparison = (Previous.getPrecedence() == prec::Relational ||
- Previous.getPrecedence() == prec::Equality) &&
+ Previous.getPrecedence() == prec::Equality ||
+ Previous.getPrecedence() == prec::Spaceship) &&
Previous.Previous &&
Previous.Previous->isNot(TT_BinaryOperator); // For >>.
bool LHSIsBinaryExpr =
@@ -316,6 +376,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
Previous.TokenText == "\'\\n\'"))))
return true;
+ if (Previous.is(TT_BlockComment) && Previous.IsMultiline)
+ return true;
+
+ if (State.NoContinuation)
+ return true;
+
return false;
}
@@ -325,6 +391,8 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
const FormatToken &Current = *State.NextToken;
assert(!State.Stack.empty());
+ State.NoContinuation = false;
+
if ((Current.is(TT_ImplicitStringLiteral) &&
(Current.Previous->Tok.getIdentifierInfo() == nullptr ||
Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() ==
@@ -376,9 +444,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;
+ // Indent preprocessor directives after the hash if required.
+ int PPColumnCorrection = 0;
+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash &&
+ Previous.is(tok::hash) && State.FirstIndent > 0 &&
+ (State.Line->Type == LT_PreprocessorDirective ||
+ State.Line->Type == LT_ImportStatement)) {
+ Spaces += State.FirstIndent;
+
+ // For preprocessor indent with tabs, State.Column will be 1 because of the
+ // hash. This causes second-level indents onward to have an extra space
+ // after the tabs. We avoid this misalignment by subtracting 1 from the
+ // column value passed to replaceWhitespace().
+ if (Style.UseTab != FormatStyle::UT_Never)
+ PPColumnCorrection = -1;
+ }
+
if (!DryRun)
Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces,
- State.Column + Spaces);
+ State.Column + Spaces + PPColumnCorrection);
// If "BreakBeforeInheritanceComma" mode, don't break within the inheritance
// declaration unless there is multiple inheritance.
@@ -405,9 +489,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak &&
Previous.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
State.Column > getNewLineColumn(State) &&
- (!Previous.Previous ||
- !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while,
- tok::kw_switch)) &&
+ (!Previous.Previous || !Previous.Previous->isOneOf(
+ tok::kw_for, tok::kw_while, tok::kw_switch)) &&
// Don't do this for simple (no expressions) one-argument function calls
// as that feels like needlessly wasting whitespace, e.g.:
//
@@ -454,7 +537,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
(P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&
!P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
P->getPrecedence() != prec::Assignment &&
- P->getPrecedence() != prec::Relational) {
+ P->getPrecedence() != prec::Relational &&
+ P->getPrecedence() != prec::Spaceship) {
bool BreakBeforeOperator =
P->MustBreakBefore || P->is(tok::lessless) ||
(P->is(TT_BinaryOperator) &&
@@ -619,8 +703,18 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
State.Stack.back().BreakBeforeParameter = false;
if (!DryRun) {
+ unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1;
+ if (Current.is(tok::r_brace) && Current.MatchingParen &&
+ // Only strip trailing empty lines for l_braces that have children, i.e.
+ // for function expressions (lambdas, arrows, etc).
+ !Current.MatchingParen->Children.empty()) {
+ // lambdas and arrow functions are expressions, thus their r_brace is not
+ // on its own line, and thus not covered by UnwrappedLineFormatter's logic
+ // about removing empty lines on closing blocks. Special case them here.
+ MaxEmptyLinesToKeep = 1;
+ }
unsigned Newlines = std::max(
- 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
+ 1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));
bool ContinuePPDirective =
State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
@@ -661,9 +755,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
// before the corresponding } or ].
if (PreviousNonComment &&
(PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
- opensProtoMessageField(*PreviousNonComment, Style) ||
- (PreviousNonComment->is(TT_TemplateString) &&
- PreviousNonComment->opensScope())))
+ opensProtoMessageField(*PreviousNonComment, Style)))
State.Stack.back().BreakBeforeClosingBrace = true;
if (State.Stack.back().AvoidBinPacking) {
@@ -731,7 +823,10 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())
return State.Stack[State.Stack.size() - 2].LastSpace;
if (Current.is(tok::identifier) && Current.Next &&
- Current.Next->is(TT_DictLiteral))
+ (Current.Next->is(TT_DictLiteral) ||
+ ((Style.Language == FormatStyle::LK_Proto ||
+ Style.Language == FormatStyle::LK_TextProto) &&
+ Current.Next->isOneOf(TT_TemplateOpener, tok::l_brace))))
return State.Stack.back().Indent;
if (NextNonComment->is(TT_ObjCStringLiteral) &&
State.StartOfStringLiteral != 0)
@@ -871,8 +966,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
// Next(...)
// ^ line up here.
State.Stack.back().Indent =
- State.Column + (Style.BreakConstructorInitializers ==
- FormatStyle::BCIS_BeforeComma ? 0 : 2);
+ State.Column +
+ (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma
+ ? 0
+ : 2);
State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
State.Stack.back().AvoidBinPacking = true;
@@ -884,7 +981,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.FirstIndent + Style.ConstructorInitializerIndentWidth;
State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
- State.Stack.back().AvoidBinPacking = true;
+ State.Stack.back().AvoidBinPacking = true;
}
if (Current.is(TT_InheritanceColon))
State.Stack.back().Indent =
@@ -912,8 +1009,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.Stack[i].NoLineBreak = true;
State.Stack[State.Stack.size() - 2].NestedBlockInlined = false;
}
- if (Previous && (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) ||
- Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) &&
+ if (Previous &&
+ (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) ||
+ Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) &&
!Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
State.Stack.back().NestedBlockInlined =
!Newline &&
@@ -922,13 +1020,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
moveStatePastFakeLParens(State, Newline);
moveStatePastScopeCloser(State);
- if (Current.is(TT_TemplateString) && Current.opensScope())
- State.Stack.back().LastSpace =
- (Current.IsMultiline ? Current.LastLineColumnWidth
- : State.Column + Current.ColumnWidth) -
- strlen("${");
- bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&
- !State.Stack.back().NoLineBreakInOperand;
+ bool AllowBreak = !State.Stack.back().NoLineBreak &&
+ !State.Stack.back().NoLineBreakInOperand;
moveStatePastScopeOpener(State, Newline);
moveStatePastFakeRParens(State);
@@ -942,13 +1035,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.Column += Current.ColumnWidth;
State.NextToken = State.NextToken->Next;
- unsigned Penalty = 0;
- if (CanBreakProtrudingToken)
- Penalty = breakProtrudingToken(Current, State, DryRun);
- if (State.Column > getColumnLimit(State)) {
- unsigned ExcessCharacters = State.Column - getColumnLimit(State);
- Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
- }
+
+ unsigned Penalty =
+ handleEndOfLine(Current, State, DryRun, AllowBreak);
if (Current.Role)
Current.Role->formatFromToken(State, this, DryRun);
@@ -1072,14 +1161,13 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
bool EndsInComma = Current.MatchingParen &&
Current.MatchingParen->Previous &&
Current.MatchingParen->Previous->is(tok::comma);
- AvoidBinPacking =
- EndsInComma || Current.is(TT_DictLiteral) ||
- Style.Language == FormatStyle::LK_Proto ||
- Style.Language == FormatStyle::LK_TextProto ||
- !Style.BinPackArguments ||
- (NextNoComment &&
- NextNoComment->isOneOf(TT_DesignatedInitializerPeriod,
- TT_DesignatedInitializerLSquare));
+ AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) ||
+ Style.Language == FormatStyle::LK_Proto ||
+ Style.Language == FormatStyle::LK_TextProto ||
+ !Style.BinPackArguments ||
+ (NextNoComment &&
+ NextNoComment->isOneOf(TT_DesignatedInitializerPeriod,
+ TT_DesignatedInitializerLSquare));
BreakBeforeParameter = EndsInComma;
if (Current.ParameterCount > 1)
NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1);
@@ -1098,18 +1186,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
LastSpace = std::max(LastSpace, State.Stack.back().Indent);
}
- // JavaScript template strings are special as we always want to indent
- // nested expressions relative to the ${}. Otherwise, this can create quite
- // a mess.
- if (Current.is(TT_TemplateString)) {
- unsigned Column = Current.IsMultiline
- ? Current.LastLineColumnWidth
- : State.Column + Current.ColumnWidth;
- NewIndent = Column;
- LastSpace = Column;
- NestedBlockIndent = Column;
- }
-
bool EndsInComma =
Current.MatchingParen &&
Current.MatchingParen->getPreviousNonComment() &&
@@ -1200,11 +1276,93 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) {
State.Stack.back().BreakBeforeParameter = true;
}
-unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
- LineState &State) {
- if (!Current.IsMultiline)
+static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn,
+ unsigned TabWidth,
+ encoding::Encoding Encoding) {
+ size_t LastNewlinePos = Text.find_last_of("\n");
+ if (LastNewlinePos == StringRef::npos) {
+ return StartColumn +
+ encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding);
+ } else {
+ return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos),
+ /*StartColumn=*/0, TabWidth, Encoding);
+ }
+}
+
+unsigned ContinuationIndenter::reformatRawStringLiteral(
+ const FormatToken &Current, LineState &State,
+ const FormatStyle &RawStringStyle, bool DryRun) {
+ unsigned StartColumn = State.Column - Current.ColumnWidth;
+ auto Delimiter = *getRawStringDelimiter(Current.TokenText);
+ // The text of a raw string is between the leading 'R"delimiter(' and the
+ // trailing 'delimiter)"'.
+ unsigned PrefixSize = 3 + Delimiter.size();
+ unsigned SuffixSize = 2 + Delimiter.size();
+
+ // The first start column is the column the raw text starts.
+ unsigned FirstStartColumn = StartColumn + PrefixSize;
+
+ // The next start column is the intended indentation a line break inside
+ // the raw string at level 0. It is determined by the following rules:
+ // - if the content starts on newline, it is one level more than the current
+ // indent, and
+ // - if the content does not start on a newline, it is the first start
+ // column.
+ // These rules have the advantage that the formatted content both does not
+ // violate the rectangle rule and visually flows within the surrounding
+ // source.
+ bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n';
+ unsigned NextStartColumn = ContentStartsOnNewline
+ ? State.Stack.back().Indent + Style.IndentWidth
+ : FirstStartColumn;
+
+ // The last start column is the column the raw string suffix starts if it is
+ // put on a newline.
+ // The last start column is the intended indentation of the raw string postfix
+ // if it is put on a newline. It is determined by the following rules:
+ // - if the raw string prefix starts on a newline, it is the column where
+ // that raw string prefix starts, and
+ // - if the raw string prefix does not start on a newline, it is the current
+ // indent.
+ unsigned LastStartColumn = Current.NewlinesBefore
+ ? FirstStartColumn - PrefixSize
+ : State.Stack.back().Indent;
+
+ std::string RawText =
+ Current.TokenText.substr(PrefixSize).drop_back(SuffixSize);
+
+ std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
+ RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
+ FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
+ /*Status=*/nullptr);
+
+ auto NewCode = applyAllReplacements(RawText, Fixes.first);
+ tooling::Replacements NoFixes;
+ if (!NewCode) {
+ State.Column += Current.ColumnWidth;
return 0;
+ }
+ if (!DryRun) {
+ SourceLocation OriginLoc =
+ Current.Tok.getLocation().getLocWithOffset(PrefixSize);
+ for (const tooling::Replacement &Fix : Fixes.first) {
+ auto Err = Whitespaces.addReplacement(tooling::Replacement(
+ SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
+ Fix.getLength(), Fix.getReplacementText()));
+ if (Err) {
+ llvm::errs() << "Failed to reformat raw string: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+ }
+ }
+ unsigned RawLastLineEndColumn = getLastLineEndColumn(
+ *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
+ State.Column = RawLastLineEndColumn + SuffixSize;
+ return Fixes.second;
+}
+unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
+ LineState &State) {
// Break before further function parameters on all levels.
for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
State.Stack[i].BreakBeforeParameter = true;
@@ -1219,33 +1377,85 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
return 0;
}
-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
- LineState &State,
- bool DryRun) {
- // Don't break multi-line tokens other than block comments. Instead, just
- // update the state.
- if (Current.isNot(TT_BlockComment) && Current.IsMultiline)
- return addMultilineToken(Current, State);
-
- // Don't break implicit string literals or import statements.
- if (Current.is(TT_ImplicitStringLiteral) ||
- State.Line->Type == LT_ImportStatement)
- return 0;
+unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
+ LineState &State, bool DryRun,
+ bool AllowBreak) {
+ unsigned Penalty = 0;
+ // Compute the raw string style to use in case this is a raw string literal
+ // that can be reformatted.
+ auto RawStringStyle = getRawStringStyle(Current, State);
+ if (RawStringStyle) {
+ Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun);
+ } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {
+ // Don't break multi-line tokens other than block comments and raw string
+ // literals. Instead, just update the state.
+ Penalty = addMultilineToken(Current, State);
+ } else if (State.Line->Type != LT_ImportStatement) {
+ // We generally don't break import statements.
+ LineState OriginalState = State;
+
+ // Whether we force the reflowing algorithm to stay strictly within the
+ // column limit.
+ bool Strict = false;
+ // Whether the first non-strict attempt at reflowing did intentionally
+ // exceed the column limit.
+ bool Exceeded = false;
+ std::tie(Penalty, Exceeded) = breakProtrudingToken(
+ Current, State, AllowBreak, /*DryRun=*/true, Strict);
+ if (Exceeded) {
+ // If non-strict reflowing exceeds the column limit, try whether strict
+ // reflowing leads to an overall lower penalty.
+ LineState StrictState = OriginalState;
+ unsigned StrictPenalty =
+ breakProtrudingToken(Current, StrictState, AllowBreak,
+ /*DryRun=*/true, /*Strict=*/true)
+ .first;
+ Strict = StrictPenalty <= Penalty;
+ if (Strict) {
+ Penalty = StrictPenalty;
+ State = StrictState;
+ }
+ }
+ if (!DryRun) {
+ // If we're not in dry-run mode, apply the changes with the decision on
+ // strictness made above.
+ breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false,
+ Strict);
+ }
+ }
+ if (State.Column > getColumnLimit(State)) {
+ unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+ Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+ }
+ return Penalty;
+}
- if (!Current.isStringLiteral() && !Current.is(tok::comment))
- return 0;
+llvm::Optional<FormatStyle>
+ContinuationIndenter::getRawStringStyle(const FormatToken &Current,
+ const LineState &State) {
+ if (!Current.isStringLiteral())
+ return None;
+ auto Delimiter = getRawStringDelimiter(Current.TokenText);
+ if (!Delimiter)
+ return None;
+ auto RawStringStyle = RawStringFormats.get(*Delimiter);
+ if (!RawStringStyle)
+ return None;
+ RawStringStyle->ColumnLimit = getColumnLimit(State);
+ return RawStringStyle;
+}
- std::unique_ptr<BreakableToken> Token;
+std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(
+ const FormatToken &Current, LineState &State, bool AllowBreak) {
unsigned StartColumn = State.Column - Current.ColumnWidth;
- unsigned ColumnLimit = getColumnLimit(State);
-
if (Current.isStringLiteral()) {
// FIXME: String literal breaking is currently disabled for Java and JS, as
// it requires strings to be merged using "+" which we don't support.
if (Style.Language == FormatStyle::LK_Java ||
Style.Language == FormatStyle::LK_JavaScript ||
- !Style.BreakStringLiterals)
- return 0;
+ !Style.BreakStringLiterals ||
+ !AllowBreak)
+ return nullptr;
// Don't break string literals inside preprocessor directives (except for
// #define directives, as their contents are stored in separate lines and
@@ -1253,11 +1463,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
// This way we avoid breaking code with line directives and unknown
// preprocessor directives that contain long string literals.
if (State.Line->Type == LT_PreprocessorDirective)
- return 0;
+ return nullptr;
// Exempts unterminated string literals from line breaking. The user will
// likely want to terminate the string before any line breaking is done.
if (Current.IsUnterminatedLiteral)
- return 0;
+ return nullptr;
StringRef Text = Current.TokenText;
StringRef Prefix;
@@ -1272,114 +1482,359 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
Text.startswith(Prefix = "u8\"") ||
Text.startswith(Prefix = "L\""))) ||
(Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
- Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
- Postfix, State.Line->InPPDirective,
- Encoding, Style));
- } else {
- return 0;
+ return llvm::make_unique<BreakableStringLiteral>(
+ Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective,
+ Encoding, Style);
}
} else if (Current.is(TT_BlockComment)) {
- if (!Current.isTrailingComment() || !Style.ReflowComments ||
+ if (!Style.ReflowComments ||
// If a comment token switches formatting, like
// /* clang-format on */, we don't want to break it further,
// but we may still want to adjust its indentation.
- switchesFormatting(Current))
- return addMultilineToken(Current, State);
- Token.reset(new BreakableBlockComment(
+ switchesFormatting(Current)) {
+ return nullptr;
+ }
+ return llvm::make_unique<BreakableBlockComment>(
Current, StartColumn, Current.OriginalColumn, !Current.Previous,
- State.Line->InPPDirective, Encoding, Style));
+ State.Line->InPPDirective, Encoding, Style);
} else if (Current.is(TT_LineComment) &&
(Current.Previous == nullptr ||
Current.Previous->isNot(TT_ImplicitStringLiteral))) {
if (!Style.ReflowComments ||
CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
switchesFormatting(Current))
- return 0;
- Token.reset(new BreakableLineCommentSection(
+ return nullptr;
+ return llvm::make_unique<BreakableLineCommentSection>(
Current, StartColumn, Current.OriginalColumn, !Current.Previous,
- /*InPPDirective=*/false, Encoding, Style));
+ /*InPPDirective=*/false, Encoding, Style);
+ }
+ return nullptr;
+}
+
+std::pair<unsigned, bool>
+ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+ LineState &State, bool AllowBreak,
+ bool DryRun, bool Strict) {
+ std::unique_ptr<const BreakableToken> Token =
+ createBreakableToken(Current, State, AllowBreak);
+ if (!Token)
+ return {0, false};
+ assert(Token->getLineCount() > 0);
+ unsigned ColumnLimit = getColumnLimit(State);
+ if (Current.is(TT_LineComment)) {
// We don't insert backslashes when breaking line comments.
ColumnLimit = Style.ColumnLimit;
- } else {
- return 0;
}
if (Current.UnbreakableTailLength >= ColumnLimit)
- return 0;
-
- unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
- bool BreakInserted = false;
+ return {0, false};
+ // ColumnWidth was already accounted into State.Column before calling
+ // breakProtrudingToken.
+ unsigned StartColumn = State.Column - Current.ColumnWidth;
+ unsigned NewBreakPenalty = Current.isStringLiteral()
+ ? Style.PenaltyBreakString
+ : Style.PenaltyBreakComment;
+ // Stores whether we intentionally decide to let a line exceed the column
+ // limit.
+ bool Exceeded = false;
+ // Stores whether we introduce a break anywhere in the token.
+ bool BreakInserted = Token->introducesBreakBeforeToken();
+ // Store whether we inserted a new line break at the end of the previous
+ // logical line.
+ bool NewBreakBefore = false;
// We use a conservative reflowing strategy. Reflow starts after a line is
// broken or the corresponding whitespace compressed. Reflow ends as soon as a
// line that doesn't get reflown with the previous line is reached.
- bool ReflowInProgress = false;
+ bool Reflow = false;
+ // Keep track of where we are in the token:
+ // Where we are in the content of the current logical line.
+ unsigned TailOffset = 0;
+ // The column number we're currently at.
+ unsigned ContentStartColumn =
+ Token->getContentStartColumn(0, /*Break=*/false);
+ // The number of columns left in the current logical line after TailOffset.
+ unsigned RemainingTokenColumns =
+ Token->getRemainingLength(0, TailOffset, ContentStartColumn);
+ // Adapt the start of the token, for example indent.
+ if (!DryRun)
+ Token->adaptStartOfLine(0, Whitespaces);
+
unsigned Penalty = 0;
- unsigned RemainingTokenColumns = 0;
+ DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn
+ << ".\n");
for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
LineIndex != EndIndex; ++LineIndex) {
- BreakableToken::Split SplitBefore(StringRef::npos, 0);
- if (ReflowInProgress) {
- SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
- RemainingSpace, CommentPragmasRegex);
- }
- ReflowInProgress = SplitBefore.first != StringRef::npos;
- unsigned TailOffset =
- ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
- if (!DryRun)
- Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
- RemainingSpace, SplitBefore, Whitespaces);
- RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
- LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
- while (RemainingTokenColumns > RemainingSpace) {
- BreakableToken::Split Split = Token->getSplit(
- LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);
+ DEBUG(llvm::dbgs() << " Line: " << LineIndex << " (Reflow: " << Reflow
+ << ")\n");
+ NewBreakBefore = false;
+ // If we did reflow the previous line, we'll try reflowing again. Otherwise
+ // we'll start reflowing if the current line is broken or whitespace is
+ // compressed.
+ bool TryReflow = Reflow;
+ // Break the current token until we can fit the rest of the line.
+ while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Over limit, need: "
+ << (ContentStartColumn + RemainingTokenColumns)
+ << ", space: " << ColumnLimit
+ << ", reflown prefix: " << ContentStartColumn
+ << ", offset in line: " << TailOffset << "\n");
+ // If the current token doesn't fit, find the latest possible split in the
+ // current line so that breaking at it will be under the column limit.
+ // FIXME: Use the earliest possible split while reflowing to correctly
+ // compress whitespace within a line.
+ BreakableToken::Split Split =
+ Token->getSplit(LineIndex, TailOffset, ColumnLimit,
+ ContentStartColumn, CommentPragmasRegex);
if (Split.first == StringRef::npos) {
- // The last line's penalty is handled in addNextStateToQueue().
+ // No break opportunity - update the penalty and continue with the next
+ // logical line.
if (LineIndex < EndIndex - 1)
+ // The last line's penalty is handled in addNextStateToQueue().
Penalty += Style.PenaltyExcessCharacter *
- (RemainingTokenColumns - RemainingSpace);
+ (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
+ DEBUG(llvm::dbgs() << " No break opportunity.\n");
break;
}
assert(Split.first != 0);
- // Check if compressing the whitespace range will bring the line length
- // under the limit. If that is the case, we perform whitespace compression
- // instead of inserting a line break.
- unsigned RemainingTokenColumnsAfterCompression =
- Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
- if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
- RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
- ReflowInProgress = true;
- if (!DryRun)
- Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
- break;
+ if (Token->supportsReflow()) {
+ // Check whether the next natural split point after the current one can
+ // still fit the line, either because we can compress away whitespace,
+ // or because the penalty the excess characters introduce is lower than
+ // the break penalty.
+ // We only do this for tokens that support reflowing, and thus allow us
+ // to change the whitespace arbitrarily (e.g. comments).
+ // Other tokens, like string literals, can be broken on arbitrary
+ // positions.
+
+ // First, compute the columns from TailOffset to the next possible split
+ // position.
+ // For example:
+ // ColumnLimit: |
+ // // Some text that breaks
+ // ^ tail offset
+ // ^-- split
+ // ^-------- to split columns
+ // ^--- next split
+ // ^--------------- to next split columns
+ unsigned ToSplitColumns = Token->getRangeLength(
+ LineIndex, TailOffset, Split.first, ContentStartColumn);
+ DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n");
+
+ BreakableToken::Split NextSplit = Token->getSplit(
+ LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
+ ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex);
+ // Compute the columns necessary to fit the next non-breakable sequence
+ // into the current line.
+ unsigned ToNextSplitColumns = 0;
+ if (NextSplit.first == StringRef::npos) {
+ ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset,
+ ContentStartColumn);
+ } else {
+ ToNextSplitColumns = Token->getRangeLength(
+ LineIndex, TailOffset,
+ Split.first + Split.second + NextSplit.first, ContentStartColumn);
+ }
+ // Compress the whitespace between the break and the start of the next
+ // unbreakable sequence.
+ ToNextSplitColumns =
+ Token->getLengthAfterCompression(ToNextSplitColumns, Split);
+ DEBUG(llvm::dbgs() << " ContentStartColumn: " << ContentStartColumn
+ << "\n");
+ DEBUG(llvm::dbgs() << " ToNextSplit: " << ToNextSplitColumns << "\n");
+ // If the whitespace compression makes us fit, continue on the current
+ // line.
+ bool ContinueOnLine =
+ ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
+ unsigned ExcessCharactersPenalty = 0;
+ if (!ContinueOnLine && !Strict) {
+ // Similarly, if the excess characters' penalty is lower than the
+ // penalty of introducing a new break, continue on the current line.
+ ExcessCharactersPenalty =
+ (ContentStartColumn + ToNextSplitColumns - ColumnLimit) *
+ Style.PenaltyExcessCharacter;
+ DEBUG(llvm::dbgs()
+ << " Penalty excess: " << ExcessCharactersPenalty
+ << "\n break : " << NewBreakPenalty << "\n");
+ if (ExcessCharactersPenalty < NewBreakPenalty) {
+ Exceeded = true;
+ ContinueOnLine = true;
+ }
+ }
+ if (ContinueOnLine) {
+ DEBUG(llvm::dbgs() << " Continuing on line...\n");
+ // The current line fits after compressing the whitespace - reflow
+ // the next line into it if possible.
+ TryReflow = true;
+ if (!DryRun)
+ Token->compressWhitespace(LineIndex, TailOffset, Split,
+ Whitespaces);
+ // When we continue on the same line, leave one space between content.
+ ContentStartColumn += ToSplitColumns + 1;
+ Penalty += ExcessCharactersPenalty;
+ TailOffset += Split.first + Split.second;
+ RemainingTokenColumns = Token->getRemainingLength(
+ LineIndex, TailOffset, ContentStartColumn);
+ continue;
+ }
}
-
- unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
- LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+ DEBUG(llvm::dbgs() << " Breaking...\n");
+ ContentStartColumn =
+ Token->getContentStartColumn(LineIndex, /*Break=*/true);
+ unsigned NewRemainingTokenColumns = Token->getRemainingLength(
+ LineIndex, TailOffset + Split.first + Split.second,
+ ContentStartColumn);
// When breaking before a tab character, it may be moved by a few columns,
// but will still be expanded to the next tab stop, so we don't save any
// columns.
- if (NewRemainingTokenColumns == RemainingTokenColumns)
+ if (NewRemainingTokenColumns == RemainingTokenColumns) {
+ // FIXME: Do we need to adjust the penalty?
break;
-
+ }
assert(NewRemainingTokenColumns < RemainingTokenColumns);
+
+ DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first
+ << ", " << Split.second << "\n");
if (!DryRun)
Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
- Penalty += Current.SplitPenalty;
- unsigned ColumnsUsed =
- Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
- if (ColumnsUsed > ColumnLimit) {
- Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);
- }
+
+ Penalty += NewBreakPenalty;
TailOffset += Split.first + Split.second;
RemainingTokenColumns = NewRemainingTokenColumns;
- ReflowInProgress = true;
BreakInserted = true;
+ NewBreakBefore = true;
}
+ // In case there's another line, prepare the state for the start of the next
+ // line.
+ if (LineIndex + 1 != EndIndex) {
+ unsigned NextLineIndex = LineIndex + 1;
+ if (NewBreakBefore)
+ // After breaking a line, try to reflow the next line into the current
+ // one once RemainingTokenColumns fits.
+ TryReflow = true;
+ if (TryReflow) {
+ // We decided that we want to try reflowing the next line into the
+ // current one.
+ // We will now adjust the state as if the reflow is successful (in
+ // preparation for the next line), and see whether that works. If we
+ // decide that we cannot reflow, we will later reset the state to the
+ // start of the next line.
+ Reflow = false;
+ // As we did not continue breaking the line, RemainingTokenColumns is
+ // known to fit after ContentStartColumn. Adapt ContentStartColumn to
+ // the position at which we want to format the next line if we do
+ // actually reflow.
+ // When we reflow, we need to add a space between the end of the current
+ // line and the next line's start column.
+ ContentStartColumn += RemainingTokenColumns + 1;
+ // Get the split that we need to reflow next logical line into the end
+ // of the current one; the split will include any leading whitespace of
+ // the next logical line.
+ BreakableToken::Split SplitBeforeNext =
+ Token->getReflowSplit(NextLineIndex, CommentPragmasRegex);
+ DEBUG(llvm::dbgs() << " Size of reflown text: " << ContentStartColumn
+ << "\n Potential reflow split: ");
+ if (SplitBeforeNext.first != StringRef::npos) {
+ DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", "
+ << SplitBeforeNext.second << "\n");
+ TailOffset = SplitBeforeNext.first + SplitBeforeNext.second;
+ // If the rest of the next line fits into the current line below the
+ // column limit, we can safely reflow.
+ RemainingTokenColumns = Token->getRemainingLength(
+ NextLineIndex, TailOffset, ContentStartColumn);
+ Reflow = true;
+ if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Over limit after reflow, need: "
+ << (ContentStartColumn + RemainingTokenColumns)
+ << ", space: " << ColumnLimit
+ << ", reflown prefix: " << ContentStartColumn
+ << ", offset in line: " << TailOffset << "\n");
+ // If the whole next line does not fit, try to find a point in
+ // the next line at which we can break so that attaching the part
+ // of the next line to that break point onto the current line is
+ // below the column limit.
+ BreakableToken::Split Split =
+ Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
+ ContentStartColumn, CommentPragmasRegex);
+ if (Split.first == StringRef::npos) {
+ DEBUG(llvm::dbgs() << " Did not find later break\n");
+ Reflow = false;
+ } else {
+ // Check whether the first split point gets us below the column
+ // limit. Note that we will execute this split below as part of
+ // the normal token breaking and reflow logic within the line.
+ unsigned ToSplitColumns = Token->getRangeLength(
+ NextLineIndex, TailOffset, Split.first, ContentStartColumn);
+ if (ContentStartColumn + ToSplitColumns > ColumnLimit) {
+ DEBUG(llvm::dbgs() << " Next split protrudes, need: "
+ << (ContentStartColumn + ToSplitColumns)
+ << ", space: " << ColumnLimit);
+ unsigned ExcessCharactersPenalty =
+ (ContentStartColumn + ToSplitColumns - ColumnLimit) *
+ Style.PenaltyExcessCharacter;
+ if (NewBreakPenalty < ExcessCharactersPenalty) {
+ Reflow = false;
+ }
+ }
+ }
+ }
+ } else {
+ DEBUG(llvm::dbgs() << "not found.\n");
+ }
+ }
+ if (!Reflow) {
+ // If we didn't reflow into the next line, the only space to consider is
+ // the next logical line. Reset our state to match the start of the next
+ // line.
+ TailOffset = 0;
+ ContentStartColumn =
+ Token->getContentStartColumn(NextLineIndex, /*Break=*/false);
+ RemainingTokenColumns = Token->getRemainingLength(
+ NextLineIndex, TailOffset, ContentStartColumn);
+ // Adapt the start of the token, for example indent.
+ if (!DryRun)
+ Token->adaptStartOfLine(NextLineIndex, Whitespaces);
+ } else {
+ // If we found a reflow split and have added a new break before the next
+ // line, we are going to remove the line break at the start of the next
+ // logical line. For example, here we'll add a new line break after
+ // 'text', and subsequently delete the line break between 'that' and
+ // 'reflows'.
+ // // some text that
+ // // reflows
+ // ->
+ // // some text
+ // // that reflows
+ // When adding the line break, we also added the penalty for it, so we
+ // need to subtract that penalty again when we remove the line break due
+ // to reflowing.
+ if (NewBreakBefore) {
+ assert(Penalty >= NewBreakPenalty);
+ Penalty -= NewBreakPenalty;
+ }
+ if (!DryRun)
+ Token->reflow(NextLineIndex, Whitespaces);
+ }
+ }
+ }
+
+ BreakableToken::Split SplitAfterLastLine =
+ Token->getSplitAfterLastLine(TailOffset);
+ if (SplitAfterLastLine.first != StringRef::npos) {
+ DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n");
+ if (!DryRun)
+ Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,
+ Whitespaces);
+ ContentStartColumn =
+ Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true);
+ RemainingTokenColumns = Token->getRemainingLength(
+ Token->getLineCount() - 1,
+ TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second,
+ ContentStartColumn);
}
- State.Column = RemainingTokenColumns;
+ State.Column = ContentStartColumn + RemainingTokenColumns -
+ Current.UnbreakableTailLength;
if (BreakInserted) {
// If we break the token inside a parameter list, we need to break before
@@ -1390,15 +1845,15 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
State.Stack[i].BreakBeforeParameter = true;
}
- Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString
- : Style.PenaltyBreakComment;
+ if (Current.is(TT_BlockComment))
+ State.NoContinuation = true;
State.Stack.back().LastSpace = StartColumn;
}
Token->updateNextToken(State);
- return Penalty;
+ return {Penalty, Exceeded};
}
unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {