diff options
Diffstat (limited to 'lib/Format/WhitespaceManager.cpp')
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 274 |
1 files changed, 179 insertions, 95 deletions
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index b64506f39035f..2c1f59324971f 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -25,64 +25,60 @@ operator()(const Change &C1, const Change &C2) const { C2.OriginalWhitespaceRange.getBegin()); } -WhitespaceManager::Change::Change( - bool CreateReplacement, SourceRange OriginalWhitespaceRange, - unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, - unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective, - bool IsStartOfDeclName, bool IsInsideToken) - : CreateReplacement(CreateReplacement), +WhitespaceManager::Change::Change(const FormatToken &Tok, + bool CreateReplacement, + SourceRange OriginalWhitespaceRange, + int Spaces, unsigned StartOfTokenColumn, + unsigned NewlinesBefore, + StringRef PreviousLinePostfix, + StringRef CurrentLinePrefix, + bool ContinuesPPDirective, bool IsInsideToken) + : Tok(&Tok), CreateReplacement(CreateReplacement), OriginalWhitespaceRange(OriginalWhitespaceRange), StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), PreviousLinePostfix(PreviousLinePostfix), - CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), - ContinuesPPDirective(ContinuesPPDirective), - IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel), - Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false), - TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), + CurrentLinePrefix(CurrentLinePrefix), + ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces), + IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0), + PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), StartOfBlockComment(nullptr), IndentationOffset(0) {} void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, - unsigned IndentLevel, unsigned Spaces, + unsigned Spaces, unsigned StartOfTokenColumn, bool InPPDirective) { if (Tok.Finalized) return; Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; - Changes.push_back( - Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel, - Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(), - InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/false)); + Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, + Spaces, StartOfTokenColumn, Newlines, "", "", + InPPDirective && !Tok.IsFirst, + /*IsInsideToken=*/false)); } void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { if (Tok.Finalized) return; - Changes.push_back(Change( - /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0, - /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", - Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/false)); + Changes.push_back(Change(Tok, /*CreateReplacement=*/false, + Tok.WhitespaceRange, /*Spaces=*/0, + Tok.OriginalColumn, Tok.NewlinesBefore, "", "", + InPPDirective && !Tok.IsFirst, + /*IsInsideToken=*/false)); } void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, - unsigned Newlines, unsigned IndentLevel, int Spaces) { + unsigned Newlines, int Spaces) { if (Tok.Finalized) return; SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); - Changes.push_back(Change( - true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), - IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix, - CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown, - InPPDirective && !Tok.IsFirst, - Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName), - /*IsInsideToken=*/Newlines == 0)); + Changes.push_back( + Change(Tok, /*CreateReplacement=*/true, + SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces, + std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix, + InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true)); } const tooling::Replacements &WhitespaceManager::generateReplacements() { @@ -125,30 +121,64 @@ void WhitespaceManager::calculateLineBreakInformation() { Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; Changes[i - 1].IsTrailingComment = - (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof || - (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) && - Changes[i - 1].Kind == tok::comment; + (Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) || + (Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) && + Changes[i - 1].Tok->is(tok::comment) && + // FIXME: This is a dirty hack. The problem is that + // BreakableLineCommentSection does comment reflow changes and here is + // the aligning of trailing comments. Consider the case where we reflow + // the second line up in this example: + // + // // line 1 + // // line 2 + // + // That amounts to 2 changes by BreakableLineCommentSection: + // - the first, delimited by (), for the whitespace between the tokens, + // - and second, delimited by [], for the whitespace at the beginning + // of the second token: + // + // // line 1( + // )[// ]line 2 + // + // So in the end we have two changes like this: + // + // // line1()[ ]line 2 + // + // Note that the OriginalWhitespaceStart of the second change is the + // same as the PreviousOriginalWhitespaceEnd of the first change. + // In this case, the below check ensures that the second change doesn't + // get treated as a trailing comment change here, since this might + // trigger additional whitespace to be wrongly inserted before "line 2" + // by the comment aligner here. + // + // For a proper solution we need a mechanism to say to WhitespaceManager + // that a particular change breaks the current sequence of trailing + // comments. + OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd; } // FIXME: The last token is currently not always an eof token; in those // cases, setting TokenLength of the last token to 0 is wrong. Changes.back().TokenLength = 0; - Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment; + Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment); const WhitespaceManager::Change *LastBlockComment = nullptr; for (auto &Change : Changes) { // Reset the IsTrailingComment flag for changes inside of trailing comments - // so they don't get realigned later. - if (Change.IsInsideToken) + // so they don't get realigned later. Comment line breaks however still need + // to be aligned. + if (Change.IsInsideToken && Change.NewlinesBefore == 0) Change.IsTrailingComment = false; Change.StartOfBlockComment = nullptr; Change.IndentationOffset = 0; - if (Change.Kind == tok::comment) { - LastBlockComment = &Change; - } else if (Change.Kind == tok::unknown) { - if ((Change.StartOfBlockComment = LastBlockComment)) - Change.IndentationOffset = - Change.StartOfTokenColumn - - Change.StartOfBlockComment->StartOfTokenColumn; + if (Change.Tok->is(tok::comment)) { + if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken) + LastBlockComment = &Change; + else { + if ((Change.StartOfBlockComment = LastBlockComment)) + Change.IndentationOffset = + Change.StartOfTokenColumn - + Change.StartOfBlockComment->StartOfTokenColumn; + } } else { LastBlockComment = nullptr; } @@ -162,21 +192,56 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, SmallVector<WhitespaceManager::Change, 16> &Changes) { bool FoundMatchOnLine = false; int Shift = 0; + + // ScopeStack keeps track of the current scope depth. It contains indices of + // the first token on each scope. + // We only run the "Matches" function on tokens from the outer-most scope. + // However, we do need to pay special attention to one class of tokens + // that are not in the outer-most scope, and that is function parameters + // which are split across multiple lines, as illustrated by this example: + // double a(int x); + // int b(int y, + // double z); + // In the above example, we need to take special care to ensure that + // 'double z' is indented along with it's owning function 'b'. + SmallVector<unsigned, 16> ScopeStack; + for (unsigned i = Start; i != End; ++i) { - if (Changes[i].NewlinesBefore > 0) { - FoundMatchOnLine = false; + if (ScopeStack.size() != 0 && + Changes[i].nestingAndIndentLevel() < + Changes[ScopeStack.back()].nestingAndIndentLevel()) + ScopeStack.pop_back(); + + if (i != Start && Changes[i].nestingAndIndentLevel() > + Changes[i - 1].nestingAndIndentLevel()) + ScopeStack.push_back(i); + + bool InsideNestedScope = ScopeStack.size() != 0; + + if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) { Shift = 0; + FoundMatchOnLine = false; } // If this is the first matching token to be aligned, remember by how many // spaces it has to be shifted, so the rest of the changes on the line are // shifted by the same amount - if (!FoundMatchOnLine && Matches(Changes[i])) { + if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) { FoundMatchOnLine = true; Shift = Column - Changes[i].StartOfTokenColumn; Changes[i].Spaces += Shift; } + // This is for function parameters that are split across multiple lines, + // as mentioned in the ScopeStack comment. + if (InsideNestedScope && Changes[i].NewlinesBefore > 0) { + unsigned ScopeStart = ScopeStack.back(); + if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) || + (ScopeStart > Start + 1 && + Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName))) + Changes[i].Spaces += Shift; + } + assert(Shift >= 0); Changes[i].StartOfTokenColumn += Shift; if (i + 1 != Changes.size()) @@ -184,15 +249,37 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, } } -// Walk through all of the changes and find sequences of matching tokens to -// align. To do so, keep track of the lines and whether or not a matching token -// was found on a line. If a matching token is found, extend the current -// sequence. If the current line cannot be part of a sequence, e.g. because -// there is an empty line before it or it contains only non-matching tokens, -// finalize the previous sequence. +// Walk through a subset of the changes, starting at StartAt, and find +// sequences of matching tokens to align. To do so, keep track of the lines and +// whether or not a matching token was found on a line. If a matching token is +// found, extend the current sequence. If the current line cannot be part of a +// sequence, e.g. because there is an empty line before it or it contains only +// non-matching tokens, finalize the previous sequence. +// The value returned is the token on which we stopped, either because we +// exhausted all items inside Changes, or because we hit a scope level higher +// than our initial scope. +// This function is recursive. Each invocation processes only the scope level +// equal to the initial level, which is the level of Changes[StartAt]. +// If we encounter a scope level greater than the initial level, then we call +// ourselves recursively, thereby avoiding the pollution of the current state +// with the alignment requirements of the nested sub-level. This recursive +// behavior is necessary for aligning function prototypes that have one or more +// arguments. +// If this function encounters a scope level less than the initial level, +// it returns the current position. +// There is a non-obvious subtlety in the recursive behavior: Even though we +// defer processing of nested levels to recursive invocations of this +// function, when it comes time to align a sequence of tokens, we run the +// alignment on the entire sequence, including the nested levels. +// When doing so, most of the nested tokens are skipped, because their +// alignment was already handled by the recursive invocations of this function. +// However, the special exception is that we do NOT skip function parameters +// that are split across multiple lines. See the test case in FormatTest.cpp +// that mentions "split function parameter alignment" for an example of this. template <typename F> -static void AlignTokens(const FormatStyle &Style, F &&Matches, - SmallVector<WhitespaceManager::Change, 16> &Changes) { +static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, + SmallVector<WhitespaceManager::Change, 16> &Changes, + unsigned StartAt) { unsigned MinColumn = 0; unsigned MaxColumn = UINT_MAX; @@ -200,14 +287,11 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, unsigned StartOfSequence = 0; unsigned EndOfSequence = 0; - // Keep track of the nesting level of matching tokens, i.e. the number of - // surrounding (), [], or {}. We will only align a sequence of matching - // token that share the same scope depth. - // - // FIXME: This could use FormatToken::NestingLevel information, but there is - // an outstanding issue wrt the brace scopes. - unsigned NestingLevelOfLastMatch = 0; - unsigned NestingLevel = 0; + // Measure the scope level (i.e. depth of (), [], {}) of the first token, and + // abort when we hit any token in a higher scope than the starting one. + auto NestingAndIndentLevel = StartAt < Changes.size() + ? Changes[StartAt].nestingAndIndentLevel() + : std::pair<unsigned, unsigned>(0, 0); // Keep track of the number of commas before the matching tokens, we will only // align a sequence of matching tokens if they are preceded by the same number @@ -235,7 +319,11 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, EndOfSequence = 0; }; - for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + unsigned i = StartAt; + for (unsigned e = Changes.size(); i != e; ++i) { + if (Changes[i].nestingAndIndentLevel() < NestingAndIndentLevel) + break; + if (Changes[i].NewlinesBefore != 0) { CommasBeforeMatch = 0; EndOfSequence = i; @@ -247,33 +335,24 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, FoundMatchOnLine = false; } - if (Changes[i].Kind == tok::comma) { + if (Changes[i].Tok->is(tok::comma)) { ++CommasBeforeMatch; - } else if (Changes[i].Kind == tok::r_brace || - Changes[i].Kind == tok::r_paren || - Changes[i].Kind == tok::r_square) { - --NestingLevel; - } else if (Changes[i].Kind == tok::l_brace || - Changes[i].Kind == tok::l_paren || - Changes[i].Kind == tok::l_square) { - // We want sequences to skip over child scopes if possible, but not the - // other way around. - NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel); - ++NestingLevel; + } else if (Changes[i].nestingAndIndentLevel() > NestingAndIndentLevel) { + // Call AlignTokens recursively, skipping over this scope block. + unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i); + i = StoppedAt - 1; + continue; } if (!Matches(Changes[i])) continue; // If there is more than one matching token per line, or if the number of - // preceding commas, or the scope depth, do not match anymore, end the - // sequence. - if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch || - NestingLevel != NestingLevelOfLastMatch) + // preceding commas, do not match anymore, end the sequence. + if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch) AlignCurrentSequence(); CommasBeforeLastMatch = CommasBeforeMatch; - NestingLevelOfLastMatch = NestingLevel; FoundMatchOnLine = true; if (StartOfSequence == 0) @@ -296,8 +375,9 @@ static void AlignTokens(const FormatStyle &Style, F &&Matches, MaxColumn = std::min(MaxColumn, ChangeMaxColumn); } - EndOfSequence = Changes.size(); + EndOfSequence = i; AlignCurrentSequence(); + return i; } void WhitespaceManager::alignConsecutiveAssignments() { @@ -314,9 +394,9 @@ void WhitespaceManager::alignConsecutiveAssignments() { if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0) return false; - return C.Kind == tok::equal; + return C.Tok->is(tok::equal); }, - Changes); + Changes, /*StartAt=*/0); } void WhitespaceManager::alignConsecutiveDeclarations() { @@ -329,9 +409,15 @@ void WhitespaceManager::alignConsecutiveDeclarations() { // const char* const* v1; // float const* v2; // SomeVeryLongType const& v3; - - AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; }, - Changes); + AlignTokens(Style, + [](Change const &C) { + // tok::kw_operator is necessary for aligning operator overload + // definitions. + return C.Tok->is(TT_StartOfName) || + C.Tok->is(TT_FunctionDeclarationName) || + C.Tok->is(tok::kw_operator); + }, + Changes, /*StartAt=*/0); } void WhitespaceManager::alignTrailingComments() { @@ -360,17 +446,14 @@ void WhitespaceManager::alignTrailingComments() { // If this comment follows an } in column 0, it probably documents the // closing of a namespace and we don't want to align it. bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 && - Changes[i - 1].Kind == tok::r_brace && + Changes[i - 1].Tok->is(tok::r_brace) && Changes[i - 1].StartOfTokenColumn == 0; bool WasAlignedWithStartOfNextLine = false; if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. unsigned CommentColumn = SourceMgr.getSpellingColumnNumber( Changes[i].OriginalWhitespaceRange.getEnd()); for (unsigned j = i + 1; j != e; ++j) { - if (Changes[j].Kind == tok::comment || - Changes[j].Kind == tok::unknown) - // Skip over comments and unknown tokens. "unknown tokens are used for - // the continuation of multiline comments. + if (Changes[j].Tok->is(tok::comment)) continue; unsigned NextColumn = SourceMgr.getSpellingColumnNumber( @@ -481,7 +564,8 @@ void WhitespaceManager::generateChanges() { C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn); else appendNewlineText(ReplacementText, C.NewlinesBefore); - appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces), + appendIndentText(ReplacementText, C.Tok->IndentLevel, + std::max(0, C.Spaces), C.StartOfTokenColumn - std::max(0, C.Spaces)); ReplacementText.append(C.CurrentLinePrefix); storeReplacement(C.OriginalWhitespaceRange, ReplacementText); |