diff options
Diffstat (limited to 'lib/Format/ContinuationIndenter.cpp')
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 747 |
1 files changed, 601 insertions, 146 deletions
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 3bf1cd8f7c13..a3d38b244c5c 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -12,8 +12,9 @@ /// //===----------------------------------------------------------------------===// -#include "BreakableToken.h" #include "ContinuationIndenter.h" +#include "BreakableToken.h" +#include "FormatInternal.h" #include "WhitespaceManager.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" @@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok, (LessTok.Previous && LessTok.Previous->is(tok::equal)))); } +// Returns the delimiter of a raw string literal, or None if TokenText is not +// the text of a raw string literal. The delimiter could be the empty string. +// For example, the delimiter of R"deli(cont)deli" is deli. +static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) { + if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'. + || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) + return None; + + // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has + // size at most 16 by the standard, so the first '(' must be among the first + // 19 bytes. + size_t LParenPos = TokenText.substr(0, 19).find_first_of('('); + if (LParenPos == StringRef::npos) + return None; + StringRef Delimiter = TokenText.substr(2, LParenPos - 2); + + // Check that the string ends in ')Delimiter"'. + size_t RParenPos = TokenText.size() - Delimiter.size() - 2; + if (TokenText[RParenPos] != ')') + return None; + if (!TokenText.substr(RParenPos + 1).startswith(Delimiter)) + return None; + return Delimiter; +} + +RawStringFormatStyleManager::RawStringFormatStyleManager( + const FormatStyle &CodeStyle) { + for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { + FormatStyle Style; + if (!getPredefinedStyle(RawStringFormat.BasedOnStyle, + RawStringFormat.Language, &Style)) { + Style = getLLVMStyle(); + Style.Language = RawStringFormat.Language; + } + Style.ColumnLimit = CodeStyle.ColumnLimit; + DelimiterStyle.insert({RawStringFormat.Delimiter, Style}); + } +} + +llvm::Optional<FormatStyle> +RawStringFormatStyleManager::get(StringRef Delimiter) const { + auto It = DelimiterStyle.find(Delimiter); + if (It == DelimiterStyle.end()) + return None; + return It->second; +} + ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, @@ -85,20 +133,32 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), Whitespaces(Whitespaces), Encoding(Encoding), BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), - CommentPragmasRegex(Style.CommentPragmas) {} + CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {} LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, + unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun) { LineState State; State.FirstIndent = FirstIndent; - State.Column = FirstIndent; + if (FirstStartColumn && Line->First->NewlinesBefore == 0) + State.Column = FirstStartColumn; + else + State.Column = FirstIndent; + // With preprocessor directive indentation, the line starts on column 0 + // since it's indented after the hash, but FirstIndent is set to the + // preprocessor indent. + if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && + (Line->Type == LT_PreprocessorDirective || + Line->Type == LT_ImportStatement)) + State.Column = 0; State.Line = Line; State.NextToken = Line->First; State.Stack.push_back(ParenState(FirstIndent, FirstIndent, /*AvoidBinPacking=*/false, /*NoLineBreak=*/false)); State.LineContainsContinuedForLoopSection = false; + State.NoContinuation = false; State.StartOfStringLiteral = 0; State.StartOfLineLevel = 0; State.LowestLevelOnLine = 0; @@ -120,9 +180,8 @@ bool ContinuationIndenter::canBreak(const LineState &State) { const FormatToken &Current = *State.NextToken; const FormatToken &Previous = *Current.Previous; assert(&Previous == Current.Previous); - if (!Current.CanBreakBefore && - !(State.Stack.back().BreakBeforeClosingBrace && - Current.closesBlockOrBlockTypeList(Style))) + if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace && + Current.closesBlockOrBlockTypeList(Style))) return false; // The opening "{" of a braced list has to be on the same line as the first // element if it is nested in another braced init list or function call. @@ -264,7 +323,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // We need special cases for ">>" which we have split into two ">" while // lexing in order to make template parsing easier. bool IsComparison = (Previous.getPrecedence() == prec::Relational || - Previous.getPrecedence() == prec::Equality) && + Previous.getPrecedence() == prec::Equality || + Previous.getPrecedence() == prec::Spaceship) && Previous.Previous && Previous.Previous->isNot(TT_BinaryOperator); // For >>. bool LHSIsBinaryExpr = @@ -316,6 +376,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { Previous.TokenText == "\'\\n\'")))) return true; + if (Previous.is(TT_BlockComment) && Previous.IsMultiline) + return true; + + if (State.NoContinuation) + return true; + return false; } @@ -325,6 +391,8 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, const FormatToken &Current = *State.NextToken; assert(!State.Stack.empty()); + State.NoContinuation = false; + if ((Current.is(TT_ImplicitStringLiteral) && (Current.Previous->Tok.getIdentifierInfo() == nullptr || Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == @@ -376,9 +444,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces; + // Indent preprocessor directives after the hash if required. + int PPColumnCorrection = 0; + if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && + Previous.is(tok::hash) && State.FirstIndent > 0 && + (State.Line->Type == LT_PreprocessorDirective || + State.Line->Type == LT_ImportStatement)) { + Spaces += State.FirstIndent; + + // For preprocessor indent with tabs, State.Column will be 1 because of the + // hash. This causes second-level indents onward to have an extra space + // after the tabs. We avoid this misalignment by subtracting 1 from the + // column value passed to replaceWhitespace(). + if (Style.UseTab != FormatStyle::UT_Never) + PPColumnCorrection = -1; + } + if (!DryRun) Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces, - State.Column + Spaces); + State.Column + Spaces + PPColumnCorrection); // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance // declaration unless there is multiple inheritance. @@ -405,9 +489,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak && Previous.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) && State.Column > getNewLineColumn(State) && - (!Previous.Previous || - !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while, - tok::kw_switch)) && + (!Previous.Previous || !Previous.Previous->isOneOf( + tok::kw_for, tok::kw_while, tok::kw_switch)) && // Don't do this for simple (no expressions) one-argument function calls // as that feels like needlessly wasting whitespace, e.g.: // @@ -454,7 +537,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, (P->is(TT_ConditionalExpr) && P->is(tok::colon))) && !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) && P->getPrecedence() != prec::Assignment && - P->getPrecedence() != prec::Relational) { + P->getPrecedence() != prec::Relational && + P->getPrecedence() != prec::Spaceship) { bool BreakBeforeOperator = P->MustBreakBefore || P->is(tok::lessless) || (P->is(TT_BinaryOperator) && @@ -619,8 +703,18 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, State.Stack.back().BreakBeforeParameter = false; if (!DryRun) { + unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1; + if (Current.is(tok::r_brace) && Current.MatchingParen && + // Only strip trailing empty lines for l_braces that have children, i.e. + // for function expressions (lambdas, arrows, etc). + !Current.MatchingParen->Children.empty()) { + // lambdas and arrow functions are expressions, thus their r_brace is not + // on its own line, and thus not covered by UnwrappedLineFormatter's logic + // about removing empty lines on closing blocks. Special case them here. + MaxEmptyLinesToKeep = 1; + } unsigned Newlines = std::max( - 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); + 1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep)); bool ContinuePPDirective = State.Line->InPPDirective && State.Line->Type != LT_ImportStatement; Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column, @@ -661,9 +755,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // before the corresponding } or ]. if (PreviousNonComment && (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - opensProtoMessageField(*PreviousNonComment, Style) || - (PreviousNonComment->is(TT_TemplateString) && - PreviousNonComment->opensScope()))) + opensProtoMessageField(*PreviousNonComment, Style))) State.Stack.back().BreakBeforeClosingBrace = true; if (State.Stack.back().AvoidBinPacking) { @@ -731,7 +823,10 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope()) return State.Stack[State.Stack.size() - 2].LastSpace; if (Current.is(tok::identifier) && Current.Next && - Current.Next->is(TT_DictLiteral)) + (Current.Next->is(TT_DictLiteral) || + ((Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && + Current.Next->isOneOf(TT_TemplateOpener, tok::l_brace)))) return State.Stack.back().Indent; if (NextNonComment->is(TT_ObjCStringLiteral) && State.StartOfStringLiteral != 0) @@ -871,8 +966,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, // Next(...) // ^ line up here. State.Stack.back().Indent = - State.Column + (Style.BreakConstructorInitializers == - FormatStyle::BCIS_BeforeComma ? 0 : 2); + State.Column + + (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma + ? 0 + : 2); State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) State.Stack.back().AvoidBinPacking = true; @@ -884,7 +981,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.FirstIndent + Style.ConstructorInitializerIndentWidth; State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) - State.Stack.back().AvoidBinPacking = true; + State.Stack.back().AvoidBinPacking = true; } if (Current.is(TT_InheritanceColon)) State.Stack.back().Indent = @@ -912,8 +1009,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Stack[i].NoLineBreak = true; State.Stack[State.Stack.size() - 2].NestedBlockInlined = false; } - if (Previous && (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) || - Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) && + if (Previous && + (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) || + Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) && !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { State.Stack.back().NestedBlockInlined = !Newline && @@ -922,13 +1020,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, moveStatePastFakeLParens(State, Newline); moveStatePastScopeCloser(State); - if (Current.is(TT_TemplateString) && Current.opensScope()) - State.Stack.back().LastSpace = - (Current.IsMultiline ? Current.LastLineColumnWidth - : State.Column + Current.ColumnWidth) - - strlen("${"); - bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak && - !State.Stack.back().NoLineBreakInOperand; + bool AllowBreak = !State.Stack.back().NoLineBreak && + !State.Stack.back().NoLineBreakInOperand; moveStatePastScopeOpener(State, Newline); moveStatePastFakeRParens(State); @@ -942,13 +1035,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Column += Current.ColumnWidth; State.NextToken = State.NextToken->Next; - unsigned Penalty = 0; - if (CanBreakProtrudingToken) - Penalty = breakProtrudingToken(Current, State, DryRun); - if (State.Column > getColumnLimit(State)) { - unsigned ExcessCharacters = State.Column - getColumnLimit(State); - Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; - } + + unsigned Penalty = + handleEndOfLine(Current, State, DryRun, AllowBreak); if (Current.Role) Current.Role->formatFromToken(State, this, DryRun); @@ -1072,14 +1161,13 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, bool EndsInComma = Current.MatchingParen && Current.MatchingParen->Previous && Current.MatchingParen->Previous->is(tok::comma); - AvoidBinPacking = - EndsInComma || Current.is(TT_DictLiteral) || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto || - !Style.BinPackArguments || - (NextNoComment && - NextNoComment->isOneOf(TT_DesignatedInitializerPeriod, - TT_DesignatedInitializerLSquare)); + AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) || + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto || + !Style.BinPackArguments || + (NextNoComment && + NextNoComment->isOneOf(TT_DesignatedInitializerPeriod, + TT_DesignatedInitializerLSquare)); BreakBeforeParameter = EndsInComma; if (Current.ParameterCount > 1) NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1); @@ -1098,18 +1186,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, LastSpace = std::max(LastSpace, State.Stack.back().Indent); } - // JavaScript template strings are special as we always want to indent - // nested expressions relative to the ${}. Otherwise, this can create quite - // a mess. - if (Current.is(TT_TemplateString)) { - unsigned Column = Current.IsMultiline - ? Current.LastLineColumnWidth - : State.Column + Current.ColumnWidth; - NewIndent = Column; - LastSpace = Column; - NestedBlockIndent = Column; - } - bool EndsInComma = Current.MatchingParen && Current.MatchingParen->getPreviousNonComment() && @@ -1200,11 +1276,93 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { State.Stack.back().BreakBeforeParameter = true; } -unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, - LineState &State) { - if (!Current.IsMultiline) +static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn, + unsigned TabWidth, + encoding::Encoding Encoding) { + size_t LastNewlinePos = Text.find_last_of("\n"); + if (LastNewlinePos == StringRef::npos) { + return StartColumn + + encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding); + } else { + return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos), + /*StartColumn=*/0, TabWidth, Encoding); + } +} + +unsigned ContinuationIndenter::reformatRawStringLiteral( + const FormatToken &Current, LineState &State, + const FormatStyle &RawStringStyle, bool DryRun) { + unsigned StartColumn = State.Column - Current.ColumnWidth; + auto Delimiter = *getRawStringDelimiter(Current.TokenText); + // The text of a raw string is between the leading 'R"delimiter(' and the + // trailing 'delimiter)"'. + unsigned PrefixSize = 3 + Delimiter.size(); + unsigned SuffixSize = 2 + Delimiter.size(); + + // The first start column is the column the raw text starts. + unsigned FirstStartColumn = StartColumn + PrefixSize; + + // The next start column is the intended indentation a line break inside + // the raw string at level 0. It is determined by the following rules: + // - if the content starts on newline, it is one level more than the current + // indent, and + // - if the content does not start on a newline, it is the first start + // column. + // These rules have the advantage that the formatted content both does not + // violate the rectangle rule and visually flows within the surrounding + // source. + bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n'; + unsigned NextStartColumn = ContentStartsOnNewline + ? State.Stack.back().Indent + Style.IndentWidth + : FirstStartColumn; + + // The last start column is the column the raw string suffix starts if it is + // put on a newline. + // The last start column is the intended indentation of the raw string postfix + // if it is put on a newline. It is determined by the following rules: + // - if the raw string prefix starts on a newline, it is the column where + // that raw string prefix starts, and + // - if the raw string prefix does not start on a newline, it is the current + // indent. + unsigned LastStartColumn = Current.NewlinesBefore + ? FirstStartColumn - PrefixSize + : State.Stack.back().Indent; + + std::string RawText = + Current.TokenText.substr(PrefixSize).drop_back(SuffixSize); + + std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat( + RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, + FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>", + /*Status=*/nullptr); + + auto NewCode = applyAllReplacements(RawText, Fixes.first); + tooling::Replacements NoFixes; + if (!NewCode) { + State.Column += Current.ColumnWidth; return 0; + } + if (!DryRun) { + SourceLocation OriginLoc = + Current.Tok.getLocation().getLocWithOffset(PrefixSize); + for (const tooling::Replacement &Fix : Fixes.first) { + auto Err = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), + Fix.getLength(), Fix.getReplacementText())); + if (Err) { + llvm::errs() << "Failed to reformat raw string: " + << llvm::toString(std::move(Err)) << "\n"; + } + } + } + unsigned RawLastLineEndColumn = getLastLineEndColumn( + *NewCode, FirstStartColumn, Style.TabWidth, Encoding); + State.Column = RawLastLineEndColumn + SuffixSize; + return Fixes.second; +} +unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, + LineState &State) { // Break before further function parameters on all levels. for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; @@ -1219,33 +1377,85 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, return 0; } -unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, - LineState &State, - bool DryRun) { - // Don't break multi-line tokens other than block comments. Instead, just - // update the state. - if (Current.isNot(TT_BlockComment) && Current.IsMultiline) - return addMultilineToken(Current, State); - - // Don't break implicit string literals or import statements. - if (Current.is(TT_ImplicitStringLiteral) || - State.Line->Type == LT_ImportStatement) - return 0; +unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current, + LineState &State, bool DryRun, + bool AllowBreak) { + unsigned Penalty = 0; + // Compute the raw string style to use in case this is a raw string literal + // that can be reformatted. + auto RawStringStyle = getRawStringStyle(Current, State); + if (RawStringStyle) { + Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun); + } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) { + // Don't break multi-line tokens other than block comments and raw string + // literals. Instead, just update the state. + Penalty = addMultilineToken(Current, State); + } else if (State.Line->Type != LT_ImportStatement) { + // We generally don't break import statements. + LineState OriginalState = State; + + // Whether we force the reflowing algorithm to stay strictly within the + // column limit. + bool Strict = false; + // Whether the first non-strict attempt at reflowing did intentionally + // exceed the column limit. + bool Exceeded = false; + std::tie(Penalty, Exceeded) = breakProtrudingToken( + Current, State, AllowBreak, /*DryRun=*/true, Strict); + if (Exceeded) { + // If non-strict reflowing exceeds the column limit, try whether strict + // reflowing leads to an overall lower penalty. + LineState StrictState = OriginalState; + unsigned StrictPenalty = + breakProtrudingToken(Current, StrictState, AllowBreak, + /*DryRun=*/true, /*Strict=*/true) + .first; + Strict = StrictPenalty <= Penalty; + if (Strict) { + Penalty = StrictPenalty; + State = StrictState; + } + } + if (!DryRun) { + // If we're not in dry-run mode, apply the changes with the decision on + // strictness made above. + breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false, + Strict); + } + } + if (State.Column > getColumnLimit(State)) { + unsigned ExcessCharacters = State.Column - getColumnLimit(State); + Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; + } + return Penalty; +} - if (!Current.isStringLiteral() && !Current.is(tok::comment)) - return 0; +llvm::Optional<FormatStyle> +ContinuationIndenter::getRawStringStyle(const FormatToken &Current, + const LineState &State) { + if (!Current.isStringLiteral()) + return None; + auto Delimiter = getRawStringDelimiter(Current.TokenText); + if (!Delimiter) + return None; + auto RawStringStyle = RawStringFormats.get(*Delimiter); + if (!RawStringStyle) + return None; + RawStringStyle->ColumnLimit = getColumnLimit(State); + return RawStringStyle; +} - std::unique_ptr<BreakableToken> Token; +std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken( + const FormatToken &Current, LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; - unsigned ColumnLimit = getColumnLimit(State); - if (Current.isStringLiteral()) { // FIXME: String literal breaking is currently disabled for Java and JS, as // it requires strings to be merged using "+" which we don't support. if (Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript || - !Style.BreakStringLiterals) - return 0; + !Style.BreakStringLiterals || + !AllowBreak) + return nullptr; // Don't break string literals inside preprocessor directives (except for // #define directives, as their contents are stored in separate lines and @@ -1253,11 +1463,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // This way we avoid breaking code with line directives and unknown // preprocessor directives that contain long string literals. if (State.Line->Type == LT_PreprocessorDirective) - return 0; + return nullptr; // Exempts unterminated string literals from line breaking. The user will // likely want to terminate the string before any line breaking is done. if (Current.IsUnterminatedLiteral) - return 0; + return nullptr; StringRef Text = Current.TokenText; StringRef Prefix; @@ -1272,114 +1482,359 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix, - Postfix, State.Line->InPPDirective, - Encoding, Style)); - } else { - return 0; + return llvm::make_unique<BreakableStringLiteral>( + Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective, + Encoding, Style); } } else if (Current.is(TT_BlockComment)) { - if (!Current.isTrailingComment() || !Style.ReflowComments || + if (!Style.ReflowComments || // If a comment token switches formatting, like // /* clang-format on */, we don't want to break it further, // but we may still want to adjust its indentation. - switchesFormatting(Current)) - return addMultilineToken(Current, State); - Token.reset(new BreakableBlockComment( + switchesFormatting(Current)) { + return nullptr; + } + return llvm::make_unique<BreakableBlockComment>( Current, StartColumn, Current.OriginalColumn, !Current.Previous, - State.Line->InPPDirective, Encoding, Style)); + State.Line->InPPDirective, Encoding, Style); } else if (Current.is(TT_LineComment) && (Current.Previous == nullptr || Current.Previous->isNot(TT_ImplicitStringLiteral))) { if (!Style.ReflowComments || CommentPragmasRegex.match(Current.TokenText.substr(2)) || switchesFormatting(Current)) - return 0; - Token.reset(new BreakableLineCommentSection( + return nullptr; + return llvm::make_unique<BreakableLineCommentSection>( Current, StartColumn, Current.OriginalColumn, !Current.Previous, - /*InPPDirective=*/false, Encoding, Style)); + /*InPPDirective=*/false, Encoding, Style); + } + return nullptr; +} + +std::pair<unsigned, bool> +ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, + LineState &State, bool AllowBreak, + bool DryRun, bool Strict) { + std::unique_ptr<const BreakableToken> Token = + createBreakableToken(Current, State, AllowBreak); + if (!Token) + return {0, false}; + assert(Token->getLineCount() > 0); + unsigned ColumnLimit = getColumnLimit(State); + if (Current.is(TT_LineComment)) { // We don't insert backslashes when breaking line comments. ColumnLimit = Style.ColumnLimit; - } else { - return 0; } if (Current.UnbreakableTailLength >= ColumnLimit) - return 0; - - unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; - bool BreakInserted = false; + return {0, false}; + // ColumnWidth was already accounted into State.Column before calling + // breakProtrudingToken. + unsigned StartColumn = State.Column - Current.ColumnWidth; + unsigned NewBreakPenalty = Current.isStringLiteral() + ? Style.PenaltyBreakString + : Style.PenaltyBreakComment; + // Stores whether we intentionally decide to let a line exceed the column + // limit. + bool Exceeded = false; + // Stores whether we introduce a break anywhere in the token. + bool BreakInserted = Token->introducesBreakBeforeToken(); + // Store whether we inserted a new line break at the end of the previous + // logical line. + bool NewBreakBefore = false; // We use a conservative reflowing strategy. Reflow starts after a line is // broken or the corresponding whitespace compressed. Reflow ends as soon as a // line that doesn't get reflown with the previous line is reached. - bool ReflowInProgress = false; + bool Reflow = false; + // Keep track of where we are in the token: + // Where we are in the content of the current logical line. + unsigned TailOffset = 0; + // The column number we're currently at. + unsigned ContentStartColumn = + Token->getContentStartColumn(0, /*Break=*/false); + // The number of columns left in the current logical line after TailOffset. + unsigned RemainingTokenColumns = + Token->getRemainingLength(0, TailOffset, ContentStartColumn); + // Adapt the start of the token, for example indent. + if (!DryRun) + Token->adaptStartOfLine(0, Whitespaces); + unsigned Penalty = 0; - unsigned RemainingTokenColumns = 0; + DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn + << ".\n"); for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { - BreakableToken::Split SplitBefore(StringRef::npos, 0); - if (ReflowInProgress) { - SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, CommentPragmasRegex); - } - ReflowInProgress = SplitBefore.first != StringRef::npos; - unsigned TailOffset = - ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; - if (!DryRun) - Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, SplitBefore, Whitespaces); - RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( - LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); - while (RemainingTokenColumns > RemainingSpace) { - BreakableToken::Split Split = Token->getSplit( - LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); + DEBUG(llvm::dbgs() << " Line: " << LineIndex << " (Reflow: " << Reflow + << ")\n"); + NewBreakBefore = false; + // If we did reflow the previous line, we'll try reflowing again. Otherwise + // we'll start reflowing if the current line is broken or whitespace is + // compressed. + bool TryReflow = Reflow; + // Break the current token until we can fit the rest of the line. + while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Over limit, need: " + << (ContentStartColumn + RemainingTokenColumns) + << ", space: " << ColumnLimit + << ", reflown prefix: " << ContentStartColumn + << ", offset in line: " << TailOffset << "\n"); + // If the current token doesn't fit, find the latest possible split in the + // current line so that breaking at it will be under the column limit. + // FIXME: Use the earliest possible split while reflowing to correctly + // compress whitespace within a line. + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, ColumnLimit, + ContentStartColumn, CommentPragmasRegex); if (Split.first == StringRef::npos) { - // The last line's penalty is handled in addNextStateToQueue(). + // No break opportunity - update the penalty and continue with the next + // logical line. if (LineIndex < EndIndex - 1) + // The last line's penalty is handled in addNextStateToQueue(). Penalty += Style.PenaltyExcessCharacter * - (RemainingTokenColumns - RemainingSpace); + (ContentStartColumn + RemainingTokenColumns - ColumnLimit); + DEBUG(llvm::dbgs() << " No break opportunity.\n"); break; } assert(Split.first != 0); - // Check if compressing the whitespace range will bring the line length - // under the limit. If that is the case, we perform whitespace compression - // instead of inserting a line break. - unsigned RemainingTokenColumnsAfterCompression = - Token->getLineLengthAfterCompression(RemainingTokenColumns, Split); - if (RemainingTokenColumnsAfterCompression <= RemainingSpace) { - RemainingTokenColumns = RemainingTokenColumnsAfterCompression; - ReflowInProgress = true; - if (!DryRun) - Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); - break; + if (Token->supportsReflow()) { + // Check whether the next natural split point after the current one can + // still fit the line, either because we can compress away whitespace, + // or because the penalty the excess characters introduce is lower than + // the break penalty. + // We only do this for tokens that support reflowing, and thus allow us + // to change the whitespace arbitrarily (e.g. comments). + // Other tokens, like string literals, can be broken on arbitrary + // positions. + + // First, compute the columns from TailOffset to the next possible split + // position. + // For example: + // ColumnLimit: | + // // Some text that breaks + // ^ tail offset + // ^-- split + // ^-------- to split columns + // ^--- next split + // ^--------------- to next split columns + unsigned ToSplitColumns = Token->getRangeLength( + LineIndex, TailOffset, Split.first, ContentStartColumn); + DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n"); + + BreakableToken::Split NextSplit = Token->getSplit( + LineIndex, TailOffset + Split.first + Split.second, ColumnLimit, + ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex); + // Compute the columns necessary to fit the next non-breakable sequence + // into the current line. + unsigned ToNextSplitColumns = 0; + if (NextSplit.first == StringRef::npos) { + ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset, + ContentStartColumn); + } else { + ToNextSplitColumns = Token->getRangeLength( + LineIndex, TailOffset, + Split.first + Split.second + NextSplit.first, ContentStartColumn); + } + // Compress the whitespace between the break and the start of the next + // unbreakable sequence. + ToNextSplitColumns = + Token->getLengthAfterCompression(ToNextSplitColumns, Split); + DEBUG(llvm::dbgs() << " ContentStartColumn: " << ContentStartColumn + << "\n"); + DEBUG(llvm::dbgs() << " ToNextSplit: " << ToNextSplitColumns << "\n"); + // If the whitespace compression makes us fit, continue on the current + // line. + bool ContinueOnLine = + ContentStartColumn + ToNextSplitColumns <= ColumnLimit; + unsigned ExcessCharactersPenalty = 0; + if (!ContinueOnLine && !Strict) { + // Similarly, if the excess characters' penalty is lower than the + // penalty of introducing a new break, continue on the current line. + ExcessCharactersPenalty = + (ContentStartColumn + ToNextSplitColumns - ColumnLimit) * + Style.PenaltyExcessCharacter; + DEBUG(llvm::dbgs() + << " Penalty excess: " << ExcessCharactersPenalty + << "\n break : " << NewBreakPenalty << "\n"); + if (ExcessCharactersPenalty < NewBreakPenalty) { + Exceeded = true; + ContinueOnLine = true; + } + } + if (ContinueOnLine) { + DEBUG(llvm::dbgs() << " Continuing on line...\n"); + // The current line fits after compressing the whitespace - reflow + // the next line into it if possible. + TryReflow = true; + if (!DryRun) + Token->compressWhitespace(LineIndex, TailOffset, Split, + Whitespaces); + // When we continue on the same line, leave one space between content. + ContentStartColumn += ToSplitColumns + 1; + Penalty += ExcessCharactersPenalty; + TailOffset += Split.first + Split.second; + RemainingTokenColumns = Token->getRemainingLength( + LineIndex, TailOffset, ContentStartColumn); + continue; + } } - - unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + DEBUG(llvm::dbgs() << " Breaking...\n"); + ContentStartColumn = + Token->getContentStartColumn(LineIndex, /*Break=*/true); + unsigned NewRemainingTokenColumns = Token->getRemainingLength( + LineIndex, TailOffset + Split.first + Split.second, + ContentStartColumn); // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. - if (NewRemainingTokenColumns == RemainingTokenColumns) + if (NewRemainingTokenColumns == RemainingTokenColumns) { + // FIXME: Do we need to adjust the penalty? break; - + } assert(NewRemainingTokenColumns < RemainingTokenColumns); + + DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first + << ", " << Split.second << "\n"); if (!DryRun) Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); - Penalty += Current.SplitPenalty; - unsigned ColumnsUsed = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); - if (ColumnsUsed > ColumnLimit) { - Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); - } + + Penalty += NewBreakPenalty; TailOffset += Split.first + Split.second; RemainingTokenColumns = NewRemainingTokenColumns; - ReflowInProgress = true; BreakInserted = true; + NewBreakBefore = true; } + // In case there's another line, prepare the state for the start of the next + // line. + if (LineIndex + 1 != EndIndex) { + unsigned NextLineIndex = LineIndex + 1; + if (NewBreakBefore) + // After breaking a line, try to reflow the next line into the current + // one once RemainingTokenColumns fits. + TryReflow = true; + if (TryReflow) { + // We decided that we want to try reflowing the next line into the + // current one. + // We will now adjust the state as if the reflow is successful (in + // preparation for the next line), and see whether that works. If we + // decide that we cannot reflow, we will later reset the state to the + // start of the next line. + Reflow = false; + // As we did not continue breaking the line, RemainingTokenColumns is + // known to fit after ContentStartColumn. Adapt ContentStartColumn to + // the position at which we want to format the next line if we do + // actually reflow. + // When we reflow, we need to add a space between the end of the current + // line and the next line's start column. + ContentStartColumn += RemainingTokenColumns + 1; + // Get the split that we need to reflow next logical line into the end + // of the current one; the split will include any leading whitespace of + // the next logical line. + BreakableToken::Split SplitBeforeNext = + Token->getReflowSplit(NextLineIndex, CommentPragmasRegex); + DEBUG(llvm::dbgs() << " Size of reflown text: " << ContentStartColumn + << "\n Potential reflow split: "); + if (SplitBeforeNext.first != StringRef::npos) { + DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", " + << SplitBeforeNext.second << "\n"); + TailOffset = SplitBeforeNext.first + SplitBeforeNext.second; + // If the rest of the next line fits into the current line below the + // column limit, we can safely reflow. + RemainingTokenColumns = Token->getRemainingLength( + NextLineIndex, TailOffset, ContentStartColumn); + Reflow = true; + if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Over limit after reflow, need: " + << (ContentStartColumn + RemainingTokenColumns) + << ", space: " << ColumnLimit + << ", reflown prefix: " << ContentStartColumn + << ", offset in line: " << TailOffset << "\n"); + // If the whole next line does not fit, try to find a point in + // the next line at which we can break so that attaching the part + // of the next line to that break point onto the current line is + // below the column limit. + BreakableToken::Split Split = + Token->getSplit(NextLineIndex, TailOffset, ColumnLimit, + ContentStartColumn, CommentPragmasRegex); + if (Split.first == StringRef::npos) { + DEBUG(llvm::dbgs() << " Did not find later break\n"); + Reflow = false; + } else { + // Check whether the first split point gets us below the column + // limit. Note that we will execute this split below as part of + // the normal token breaking and reflow logic within the line. + unsigned ToSplitColumns = Token->getRangeLength( + NextLineIndex, TailOffset, Split.first, ContentStartColumn); + if (ContentStartColumn + ToSplitColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Next split protrudes, need: " + << (ContentStartColumn + ToSplitColumns) + << ", space: " << ColumnLimit); + unsigned ExcessCharactersPenalty = + (ContentStartColumn + ToSplitColumns - ColumnLimit) * + Style.PenaltyExcessCharacter; + if (NewBreakPenalty < ExcessCharactersPenalty) { + Reflow = false; + } + } + } + } + } else { + DEBUG(llvm::dbgs() << "not found.\n"); + } + } + if (!Reflow) { + // If we didn't reflow into the next line, the only space to consider is + // the next logical line. Reset our state to match the start of the next + // line. + TailOffset = 0; + ContentStartColumn = + Token->getContentStartColumn(NextLineIndex, /*Break=*/false); + RemainingTokenColumns = Token->getRemainingLength( + NextLineIndex, TailOffset, ContentStartColumn); + // Adapt the start of the token, for example indent. + if (!DryRun) + Token->adaptStartOfLine(NextLineIndex, Whitespaces); + } else { + // If we found a reflow split and have added a new break before the next + // line, we are going to remove the line break at the start of the next + // logical line. For example, here we'll add a new line break after + // 'text', and subsequently delete the line break between 'that' and + // 'reflows'. + // // some text that + // // reflows + // -> + // // some text + // // that reflows + // When adding the line break, we also added the penalty for it, so we + // need to subtract that penalty again when we remove the line break due + // to reflowing. + if (NewBreakBefore) { + assert(Penalty >= NewBreakPenalty); + Penalty -= NewBreakPenalty; + } + if (!DryRun) + Token->reflow(NextLineIndex, Whitespaces); + } + } + } + + BreakableToken::Split SplitAfterLastLine = + Token->getSplitAfterLastLine(TailOffset); + if (SplitAfterLastLine.first != StringRef::npos) { + DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n"); + if (!DryRun) + Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine, + Whitespaces); + ContentStartColumn = + Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true); + RemainingTokenColumns = Token->getRemainingLength( + Token->getLineCount() - 1, + TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second, + ContentStartColumn); } - State.Column = RemainingTokenColumns; + State.Column = ContentStartColumn + RemainingTokenColumns - + Current.UnbreakableTailLength; if (BreakInserted) { // If we break the token inside a parameter list, we need to break before @@ -1390,15 +1845,15 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, State.Stack[i].BreakBeforeParameter = true; } - Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString - : Style.PenaltyBreakComment; + if (Current.is(TT_BlockComment)) + State.NoContinuation = true; State.Stack.back().LastSpace = StartColumn; } Token->updateNextToken(State); - return Penalty; + return {Penalty, Exceeded}; } unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const { |