summaryrefslogtreecommitdiff
path: root/lib/Format
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Format')
-rw-r--r--lib/Format/BreakableToken.cpp28
-rw-r--r--lib/Format/ContinuationIndenter.cpp6
-rw-r--r--lib/Format/Format.cpp3
-rw-r--r--lib/Format/TokenAnnotator.cpp64
-rw-r--r--lib/Format/UnwrappedLineFormatter.cpp3
-rw-r--r--lib/Format/UnwrappedLineParser.cpp65
6 files changed, 112 insertions, 57 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index c97486e4e4a79..3c9df62f80dca 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -41,7 +41,8 @@ static bool IsBlank(char C) {
}
static StringRef getLineCommentIndentPrefix(StringRef Comment) {
- static const char *const KnownPrefixes[] = {"///", "//", "//!"};
+ static const char *const KnownPrefixes[] = {
+ "///<", "//!<", "///", "//", "//!"};
StringRef LongestPrefix;
for (StringRef KnownPrefix : KnownPrefixes) {
if (Comment.startswith(KnownPrefix)) {
@@ -77,6 +78,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
}
StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
+
+ // Do not split before a number followed by a dot: this would be interpreted
+ // as a numbered list, which would prevent re-flowing in subsequent passes.
+ static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
+ if (SpaceOffset != StringRef::npos &&
+ kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks)))
+ SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
+
if (SpaceOffset == StringRef::npos ||
// Don't break at leading whitespace.
Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
@@ -298,8 +307,9 @@ const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
static bool mayReflowContent(StringRef Content) {
Content = Content.trim(Blanks);
// Lines starting with '@' commonly have special meaning.
- static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = {
- "@", "TODO", "FIXME", "XXX"};
+ // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
+ static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = {
+ "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " };
bool hasSpecialMeaningPrefix = false;
for (StringRef Prefix : kSpecialMeaningPrefixes) {
if (Content.startswith(Prefix)) {
@@ -307,6 +317,14 @@ static bool mayReflowContent(StringRef Content) {
break;
}
}
+
+ // Numbered lists may also start with a number followed by '.'
+ // To avoid issues if a line starts with a number which is actually the end
+ // of a previous line, we only consider numbers with up to 2 digits.
+ static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
+ hasSpecialMeaningPrefix = hasSpecialMeaningPrefix ||
+ kNumberedListRegexp.match(Content);
+
// Simple heuristic for what to reflow: content should contain at least two
// characters and either the first or second character must be
// non-punctuation.
@@ -692,6 +710,10 @@ BreakableLineCommentSection::BreakableLineCommentSection(
Prefix[i] = "/// ";
else if (Prefix[i] == "//!")
Prefix[i] = "//! ";
+ else if (Prefix[i] == "///<")
+ Prefix[i] = "///< ";
+ else if (Prefix[i] == "//!<")
+ Prefix[i] = "//!< ";
}
Tokens[i] = LineTok;
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 488f9dd582f9e..006a9710148fe 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -207,7 +207,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
// ...
// }.bind(...));
// FIXME: We should find a more generic solution to this problem.
- !(State.Column <= NewLineColumn &&
+ !(State.Column <= NewLineColumn && Previous.isNot(tok::r_paren) &&
Style.Language == FormatStyle::LK_JavaScript))
return true;
@@ -587,8 +587,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
if (!DryRun) {
unsigned Newlines = std::max(
1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
+ bool ContinuePPDirective =
+ State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
- State.Line->InPPDirective);
+ ContinuePPDirective);
}
if (!Current.isTrailingComment())
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index c8677e805179c..ac83379e6b15d 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -343,6 +343,8 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
IO.mapOptional("ObjCSpaceBeforeProtocolList",
Style.ObjCSpaceBeforeProtocolList);
+ IO.mapOptional("PenaltyBreakAssignment",
+ Style.PenaltyBreakAssignment);
IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
Style.PenaltyBreakBeforeFirstCallParameter);
IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
@@ -582,6 +584,7 @@ FormatStyle getLLVMStyle() {
LLVMStyle.SpaceBeforeAssignmentOperators = true;
LLVMStyle.SpacesInAngles = false;
+ LLVMStyle.PenaltyBreakAssignment = prec::Assignment;
LLVMStyle.PenaltyBreakComment = 300;
LLVMStyle.PenaltyBreakFirstLessLess = 120;
LLVMStyle.PenaltyBreakString = 1000;
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 387768a6ee56a..79f438eb0f88f 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -576,12 +576,13 @@ private:
}
break;
case tok::kw_for:
- if (Style.Language == FormatStyle::LK_JavaScript)
+ if (Style.Language == FormatStyle::LK_JavaScript) {
if (Tok->Previous && Tok->Previous->is(tok::period))
break;
- // JS' for async ( ...
- if (CurrentToken->is(Keywords.kw_async))
+ // JS' for await ( ...
+ if (CurrentToken && CurrentToken->is(Keywords.kw_await))
next();
+ }
Contexts.back().ColonIsForRangeExpr = true;
next();
if (!parseParens())
@@ -703,9 +704,12 @@ private:
void parseIncludeDirective() {
if (CurrentToken && CurrentToken->is(tok::less)) {
- next();
- while (CurrentToken) {
- if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
+ next();
+ while (CurrentToken) {
+ // Mark tokens up to the trailing line comments as implicit string
+ // literals.
+ if (CurrentToken->isNot(tok::comment) &&
+ !CurrentToken->TokenText.startswith("//"))
CurrentToken->Type = TT_ImplicitStringLiteral;
next();
}
@@ -2089,9 +2093,10 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.is(TT_ConditionalExpr))
return prec::Conditional;
prec::Level Level = Left.getPrecedence();
- if (Level != prec::Unknown)
- return Level;
- Level = Right.getPrecedence();
+ if (Level == prec::Unknown)
+ Level = Right.getPrecedence();
+ if (Level == prec::Assignment)
+ return Style.PenaltyBreakAssignment;
if (Level != prec::Unknown)
return Level;
@@ -2252,8 +2257,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
} else if (Style.Language == FormatStyle::LK_JavaScript) {
if (Left.is(TT_JsFatArrow))
return true;
- // for async ( ...
- if (Right.is(tok::l_paren) && Left.is(Keywords.kw_async) &&
+ // for await ( ...
+ if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) &&
Left.Previous && Left.Previous->is(tok::kw_for))
return true;
if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
@@ -2472,22 +2477,25 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
// If the last token before a '}', ']', or ')' is a comma or a trailing
// comment, the intention is to insert a line break after it in order to make
- // shuffling around entries easier.
- const FormatToken *BeforeClosingBrace = nullptr;
- if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
- (Style.Language == FormatStyle::LK_JavaScript &&
- Left.is(tok::l_paren))) &&
- Left.BlockKind != BK_Block && Left.MatchingParen)
- BeforeClosingBrace = Left.MatchingParen->Previous;
- else if (Right.MatchingParen &&
- (Right.MatchingParen->isOneOf(tok::l_brace,
- TT_ArrayInitializerLSquare) ||
- (Style.Language == FormatStyle::LK_JavaScript &&
- Right.MatchingParen->is(tok::l_paren))))
- BeforeClosingBrace = &Left;
- if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
- BeforeClosingBrace->isTrailingComment()))
- return true;
+ // shuffling around entries easier. Import statements, especially in
+ // JavaScript, can be an exception to this rule.
+ if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
+ const FormatToken *BeforeClosingBrace = nullptr;
+ if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+ (Style.Language == FormatStyle::LK_JavaScript &&
+ Left.is(tok::l_paren))) &&
+ Left.BlockKind != BK_Block && Left.MatchingParen)
+ BeforeClosingBrace = Left.MatchingParen->Previous;
+ else if (Right.MatchingParen &&
+ (Right.MatchingParen->isOneOf(tok::l_brace,
+ TT_ArrayInitializerLSquare) ||
+ (Style.Language == FormatStyle::LK_JavaScript &&
+ Right.MatchingParen->is(tok::l_paren))))
+ BeforeClosingBrace = &Left;
+ if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
+ BeforeClosingBrace->isTrailingComment()))
+ return true;
+ }
if (Right.is(tok::comment))
return Left.BlockKind != BK_BracedInit &&
@@ -2562,7 +2570,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
Keywords.kw_interface, Keywords.kw_type, tok::kw_static,
tok::kw_public, tok::kw_private, tok::kw_protected,
Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set))
- return false; // Otherwise a semicolon is inserted.
+ return false; // Otherwise automatic semicolon insertion would trigger.
if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
return false;
if (Left.is(TT_JsTypeColon))
diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp
index 8ff893426e255..01504da0a29ba 100644
--- a/lib/Format/UnwrappedLineFormatter.cpp
+++ b/lib/Format/UnwrappedLineFormatter.cpp
@@ -611,7 +611,8 @@ public:
LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
while (State.NextToken) {
formatChildren(State, /*Newline=*/false, DryRun, Penalty);
- Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
+ Indenter->addTokenToState(
+ State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun);
}
return Penalty;
}
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 31c66ffb00a12..ae79ea5d8a668 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -55,13 +55,33 @@ private:
std::vector<bool> &Stack;
};
+static bool isLineComment(const FormatToken &FormatTok) {
+ return FormatTok.is(tok::comment) &&
+ FormatTok.TokenText.startswith("//");
+}
+
+// Checks if \p FormatTok is a line comment that continues the line comment
+// \p Previous. The original column of \p MinColumnToken is used to determine
+// whether \p FormatTok is indented enough to the right to continue \p Previous.
+static bool continuesLineComment(const FormatToken &FormatTok,
+ const FormatToken *Previous,
+ const FormatToken *MinColumnToken) {
+ if (!Previous || !MinColumnToken)
+ return false;
+ unsigned MinContinueColumn =
+ MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
+ return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
+ isLineComment(*Previous) &&
+ FormatTok.OriginalColumn >= MinContinueColumn;
+}
+
class ScopedMacroState : public FormatTokenSource {
public:
ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
FormatToken *&ResetToken)
: Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
- Token(nullptr) {
+ Token(nullptr), PreviousToken(nullptr) {
TokenSource = this;
Line.Level = 0;
Line.InPPDirective = true;
@@ -78,6 +98,7 @@ public:
// The \c UnwrappedLineParser guards against this by never calling
// \c getNextToken() after it has encountered the first eof token.
assert(!eof());
+ PreviousToken = Token;
Token = PreviousTokenSource->getNextToken();
if (eof())
return getFakeEOF();
@@ -87,12 +108,17 @@ public:
unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
FormatToken *setPosition(unsigned Position) override {
+ PreviousToken = nullptr;
Token = PreviousTokenSource->setPosition(Position);
return Token;
}
private:
- bool eof() { return Token && Token->HasUnescapedNewline; }
+ bool eof() {
+ return Token && Token->HasUnescapedNewline &&
+ !continuesLineComment(*Token, PreviousToken,
+ /*MinColumnToken=*/PreviousToken);
+ }
FormatToken *getFakeEOF() {
static bool EOFInitialized = false;
@@ -112,6 +138,7 @@ private:
FormatTokenSource *PreviousTokenSource;
FormatToken *Token;
+ FormatToken *PreviousToken;
};
} // end anonymous namespace
@@ -429,8 +456,9 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
parseParens();
addUnwrappedLine();
- size_t OpeningLineIndex =
- Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
+ size_t OpeningLineIndex = CurrentLines->empty()
+ ? (UnwrappedLine::kInvalidIndex)
+ : (CurrentLines->size() - 1);
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
MustBeDeclaration);
@@ -1635,9 +1663,9 @@ void UnwrappedLineParser::parseForOrWhileLoop() {
assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
"'for', 'while' or foreach macro expected");
nextToken();
- // JS' for async ( ...
+ // JS' for await ( ...
if (Style.Language == FormatStyle::LK_JavaScript &&
- FormatTok->is(Keywords.kw_async))
+ FormatTok->is(Keywords.kw_await))
nextToken();
if (FormatTok->Tok.is(tok::l_paren))
parseParens();
@@ -2091,16 +2119,11 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
FormatTok.NewlinesBefore > 0;
}
-static bool isLineComment(const FormatToken &FormatTok) {
- return FormatTok.is(tok::comment) &&
- FormatTok.TokenText.startswith("//");
-}
-
// Checks if \p FormatTok is a line comment that continues the line comment
// section on \p Line.
-static bool continuesLineComment(const FormatToken &FormatTok,
- const UnwrappedLine &Line,
- llvm::Regex &CommentPragmasRegex) {
+static bool continuesLineCommentSection(const FormatToken &FormatTok,
+ const UnwrappedLine &Line,
+ llvm::Regex &CommentPragmasRegex) {
if (Line.Tokens.empty())
return false;
@@ -2199,12 +2222,8 @@ static bool continuesLineComment(const FormatToken &FormatTok,
MinColumnToken = PreviousToken;
}
- unsigned MinContinueColumn =
- MinColumnToken->OriginalColumn +
- (isLineComment(*MinColumnToken) ? 0 : 1);
- return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
- isLineComment(*(Line.Tokens.back().Tok)) &&
- FormatTok.OriginalColumn >= MinContinueColumn;
+ return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
+ MinColumnToken);
}
void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
@@ -2222,7 +2241,7 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
// FIXME: Consider putting separate line comment sections as children to the
// unwrapped line instead.
(*I)->ContinuesLineCommentSection =
- continuesLineComment(**I, *Line, CommentPragmasRegex);
+ continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
addUnwrappedLine();
pushToken(*I);
@@ -2255,7 +2274,7 @@ void UnwrappedLineParser::distributeComments(
const SmallVectorImpl<FormatToken *> &Comments,
const FormatToken *NextTok) {
// Whether or not a line comment token continues a line is controlled by
- // the method continuesLineComment, with the following caveat:
+ // the method continuesLineCommentSection, with the following caveat:
//
// Define a trail of Comments to be a nonempty proper postfix of Comments such
// that each comment line from the trail is aligned with the next token, if
@@ -2293,7 +2312,7 @@ void UnwrappedLineParser::distributeComments(
FormatTok->ContinuesLineCommentSection = false;
} else {
FormatTok->ContinuesLineCommentSection =
- continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
+ continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
}
if (!FormatTok->ContinuesLineCommentSection &&
(isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {