diff options
Diffstat (limited to 'lib/Format')
-rw-r--r-- | lib/Format/BreakableToken.cpp | 28 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 6 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 3 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 64 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 3 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 65 |
6 files changed, 112 insertions, 57 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index c97486e4e4a79..3c9df62f80dca 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -41,7 +41,8 @@ static bool IsBlank(char C) { } static StringRef getLineCommentIndentPrefix(StringRef Comment) { - static const char *const KnownPrefixes[] = {"///", "//", "//!"}; + static const char *const KnownPrefixes[] = { + "///<", "//!<", "///", "//", "//!"}; StringRef LongestPrefix; for (StringRef KnownPrefix : KnownPrefixes) { if (Comment.startswith(KnownPrefix)) { @@ -77,6 +78,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text, } StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); + + // Do not split before a number followed by a dot: this would be interpreted + // as a numbered list, which would prevent re-flowing in subsequent passes. + static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); + if (SpaceOffset != StringRef::npos && + kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) + SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); + if (SpaceOffset == StringRef::npos || // Don't break at leading whitespace. Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { @@ -298,8 +307,9 @@ const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { static bool mayReflowContent(StringRef Content) { Content = Content.trim(Blanks); // Lines starting with '@' commonly have special meaning. - static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = { - "@", "TODO", "FIXME", "XXX"}; + // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. + static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = { + "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " }; bool hasSpecialMeaningPrefix = false; for (StringRef Prefix : kSpecialMeaningPrefixes) { if (Content.startswith(Prefix)) { @@ -307,6 +317,14 @@ static bool mayReflowContent(StringRef Content) { break; } } + + // Numbered lists may also start with a number followed by '.' + // To avoid issues if a line starts with a number which is actually the end + // of a previous line, we only consider numbers with up to 2 digits. + static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. "); + hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || + kNumberedListRegexp.match(Content); + // Simple heuristic for what to reflow: content should contain at least two // characters and either the first or second character must be // non-punctuation. @@ -692,6 +710,10 @@ BreakableLineCommentSection::BreakableLineCommentSection( Prefix[i] = "/// "; else if (Prefix[i] == "//!") Prefix[i] = "//! "; + else if (Prefix[i] == "///<") + Prefix[i] = "///< "; + else if (Prefix[i] == "//!<") + Prefix[i] = "//!< "; } Tokens[i] = LineTok; diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 488f9dd582f9e..006a9710148fe 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -207,7 +207,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // ... // }.bind(...)); // FIXME: We should find a more generic solution to this problem. - !(State.Column <= NewLineColumn && + !(State.Column <= NewLineColumn && Previous.isNot(tok::r_paren) && Style.Language == FormatStyle::LK_JavaScript)) return true; @@ -587,8 +587,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, if (!DryRun) { unsigned Newlines = std::max( 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); + bool ContinuePPDirective = + State.Line->InPPDirective && State.Line->Type != LT_ImportStatement; Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column, - State.Line->InPPDirective); + ContinuePPDirective); } if (!Current.isTrailingComment()) diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index c8677e805179c..ac83379e6b15d 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -343,6 +343,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); IO.mapOptional("ObjCSpaceBeforeProtocolList", Style.ObjCSpaceBeforeProtocolList); + IO.mapOptional("PenaltyBreakAssignment", + Style.PenaltyBreakAssignment); IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", Style.PenaltyBreakBeforeFirstCallParameter); IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); @@ -582,6 +584,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpaceBeforeAssignmentOperators = true; LLVMStyle.SpacesInAngles = false; + LLVMStyle.PenaltyBreakAssignment = prec::Assignment; LLVMStyle.PenaltyBreakComment = 300; LLVMStyle.PenaltyBreakFirstLessLess = 120; LLVMStyle.PenaltyBreakString = 1000; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 387768a6ee56a..79f438eb0f88f 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -576,12 +576,13 @@ private: } break; case tok::kw_for: - if (Style.Language == FormatStyle::LK_JavaScript) + if (Style.Language == FormatStyle::LK_JavaScript) { if (Tok->Previous && Tok->Previous->is(tok::period)) break; - // JS' for async ( ... - if (CurrentToken->is(Keywords.kw_async)) + // JS' for await ( ... + if (CurrentToken && CurrentToken->is(Keywords.kw_await)) next(); + } Contexts.back().ColonIsForRangeExpr = true; next(); if (!parseParens()) @@ -703,9 +704,12 @@ private: void parseIncludeDirective() { if (CurrentToken && CurrentToken->is(tok::less)) { - next(); - while (CurrentToken) { - if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) + next(); + while (CurrentToken) { + // Mark tokens up to the trailing line comments as implicit string + // literals. + if (CurrentToken->isNot(tok::comment) && + !CurrentToken->TokenText.startswith("//")) CurrentToken->Type = TT_ImplicitStringLiteral; next(); } @@ -2089,9 +2093,10 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(TT_ConditionalExpr)) return prec::Conditional; prec::Level Level = Left.getPrecedence(); - if (Level != prec::Unknown) - return Level; - Level = Right.getPrecedence(); + if (Level == prec::Unknown) + Level = Right.getPrecedence(); + if (Level == prec::Assignment) + return Style.PenaltyBreakAssignment; if (Level != prec::Unknown) return Level; @@ -2252,8 +2257,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(TT_JsFatArrow)) return true; - // for async ( ... - if (Right.is(tok::l_paren) && Left.is(Keywords.kw_async) && + // for await ( ... + if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous && Left.Previous->is(tok::kw_for)) return true; if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && @@ -2472,22 +2477,25 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, // If the last token before a '}', ']', or ')' is a comma or a trailing // comment, the intention is to insert a line break after it in order to make - // shuffling around entries easier. - const FormatToken *BeforeClosingBrace = nullptr; - if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - (Style.Language == FormatStyle::LK_JavaScript && - Left.is(tok::l_paren))) && - Left.BlockKind != BK_Block && Left.MatchingParen) - BeforeClosingBrace = Left.MatchingParen->Previous; - else if (Right.MatchingParen && - (Right.MatchingParen->isOneOf(tok::l_brace, - TT_ArrayInitializerLSquare) || - (Style.Language == FormatStyle::LK_JavaScript && - Right.MatchingParen->is(tok::l_paren)))) - BeforeClosingBrace = &Left; - if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || - BeforeClosingBrace->isTrailingComment())) - return true; + // shuffling around entries easier. Import statements, especially in + // JavaScript, can be an exception to this rule. + if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) { + const FormatToken *BeforeClosingBrace = nullptr; + if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || + (Style.Language == FormatStyle::LK_JavaScript && + Left.is(tok::l_paren))) && + Left.BlockKind != BK_Block && Left.MatchingParen) + BeforeClosingBrace = Left.MatchingParen->Previous; + else if (Right.MatchingParen && + (Right.MatchingParen->isOneOf(tok::l_brace, + TT_ArrayInitializerLSquare) || + (Style.Language == FormatStyle::LK_JavaScript && + Right.MatchingParen->is(tok::l_paren)))) + BeforeClosingBrace = &Left; + if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || + BeforeClosingBrace->isTrailingComment())) + return true; + } if (Right.is(tok::comment)) return Left.BlockKind != BK_BracedInit && @@ -2562,7 +2570,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Keywords.kw_interface, Keywords.kw_type, tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected, Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set)) - return false; // Otherwise a semicolon is inserted. + return false; // Otherwise automatic semicolon insertion would trigger. if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) return false; if (Left.is(TT_JsTypeColon)) diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index 8ff893426e255..01504da0a29ba 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -611,7 +611,8 @@ public: LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); while (State.NextToken) { formatChildren(State, /*Newline=*/false, DryRun, Penalty); - Indenter->addTokenToState(State, /*Newline=*/false, DryRun); + Indenter->addTokenToState( + State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun); } return Penalty; } diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 31c66ffb00a12..ae79ea5d8a668 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -55,13 +55,33 @@ private: std::vector<bool> &Stack; }; +static bool isLineComment(const FormatToken &FormatTok) { + return FormatTok.is(tok::comment) && + FormatTok.TokenText.startswith("//"); +} + +// Checks if \p FormatTok is a line comment that continues the line comment +// \p Previous. The original column of \p MinColumnToken is used to determine +// whether \p FormatTok is indented enough to the right to continue \p Previous. +static bool continuesLineComment(const FormatToken &FormatTok, + const FormatToken *Previous, + const FormatToken *MinColumnToken) { + if (!Previous || !MinColumnToken) + return false; + unsigned MinContinueColumn = + MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); + return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && + isLineComment(*Previous) && + FormatTok.OriginalColumn >= MinContinueColumn; +} + class ScopedMacroState : public FormatTokenSource { public: ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, FormatToken *&ResetToken) : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), - Token(nullptr) { + Token(nullptr), PreviousToken(nullptr) { TokenSource = this; Line.Level = 0; Line.InPPDirective = true; @@ -78,6 +98,7 @@ public: // The \c UnwrappedLineParser guards against this by never calling // \c getNextToken() after it has encountered the first eof token. assert(!eof()); + PreviousToken = Token; Token = PreviousTokenSource->getNextToken(); if (eof()) return getFakeEOF(); @@ -87,12 +108,17 @@ public: unsigned getPosition() override { return PreviousTokenSource->getPosition(); } FormatToken *setPosition(unsigned Position) override { + PreviousToken = nullptr; Token = PreviousTokenSource->setPosition(Position); return Token; } private: - bool eof() { return Token && Token->HasUnescapedNewline; } + bool eof() { + return Token && Token->HasUnescapedNewline && + !continuesLineComment(*Token, PreviousToken, + /*MinColumnToken=*/PreviousToken); + } FormatToken *getFakeEOF() { static bool EOFInitialized = false; @@ -112,6 +138,7 @@ private: FormatTokenSource *PreviousTokenSource; FormatToken *Token; + FormatToken *PreviousToken; }; } // end anonymous namespace @@ -429,8 +456,9 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, parseParens(); addUnwrappedLine(); - size_t OpeningLineIndex = - Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1); + size_t OpeningLineIndex = CurrentLines->empty() + ? (UnwrappedLine::kInvalidIndex) + : (CurrentLines->size() - 1); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); @@ -1635,9 +1663,9 @@ void UnwrappedLineParser::parseForOrWhileLoop() { assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && "'for', 'while' or foreach macro expected"); nextToken(); - // JS' for async ( ... + // JS' for await ( ... if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->is(Keywords.kw_async)) + FormatTok->is(Keywords.kw_await)) nextToken(); if (FormatTok->Tok.is(tok::l_paren)) parseParens(); @@ -2091,16 +2119,11 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { FormatTok.NewlinesBefore > 0; } -static bool isLineComment(const FormatToken &FormatTok) { - return FormatTok.is(tok::comment) && - FormatTok.TokenText.startswith("//"); -} - // Checks if \p FormatTok is a line comment that continues the line comment // section on \p Line. -static bool continuesLineComment(const FormatToken &FormatTok, - const UnwrappedLine &Line, - llvm::Regex &CommentPragmasRegex) { +static bool continuesLineCommentSection(const FormatToken &FormatTok, + const UnwrappedLine &Line, + llvm::Regex &CommentPragmasRegex) { if (Line.Tokens.empty()) return false; @@ -2199,12 +2222,8 @@ static bool continuesLineComment(const FormatToken &FormatTok, MinColumnToken = PreviousToken; } - unsigned MinContinueColumn = - MinColumnToken->OriginalColumn + - (isLineComment(*MinColumnToken) ? 0 : 1); - return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && - isLineComment(*(Line.Tokens.back().Tok)) && - FormatTok.OriginalColumn >= MinContinueColumn; + return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, + MinColumnToken); } void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { @@ -2222,7 +2241,7 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { // FIXME: Consider putting separate line comment sections as children to the // unwrapped line instead. (*I)->ContinuesLineCommentSection = - continuesLineComment(**I, *Line, CommentPragmasRegex); + continuesLineCommentSection(**I, *Line, CommentPragmasRegex); if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) addUnwrappedLine(); pushToken(*I); @@ -2255,7 +2274,7 @@ void UnwrappedLineParser::distributeComments( const SmallVectorImpl<FormatToken *> &Comments, const FormatToken *NextTok) { // Whether or not a line comment token continues a line is controlled by - // the method continuesLineComment, with the following caveat: + // the method continuesLineCommentSection, with the following caveat: // // Define a trail of Comments to be a nonempty proper postfix of Comments such // that each comment line from the trail is aligned with the next token, if @@ -2293,7 +2312,7 @@ void UnwrappedLineParser::distributeComments( FormatTok->ContinuesLineCommentSection = false; } else { FormatTok->ContinuesLineCommentSection = - continuesLineComment(*FormatTok, *Line, CommentPragmasRegex); + continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); } if (!FormatTok->ContinuesLineCommentSection && (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { |