summaryrefslogtreecommitdiff
path: root/lib/Format/UnwrappedLineParser.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-16 16:02:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-16 16:02:28 +0000
commit7442d6faa2719e4e7d33a7021c406c5a4facd74d (patch)
treec72b9241553fc9966179aba84f90f17bfa9235c3 /lib/Format/UnwrappedLineParser.cpp
parentb52119637f743680a99710ce5fdb6646da2772af (diff)
Notes
Diffstat (limited to 'lib/Format/UnwrappedLineParser.cpp')
-rw-r--r--lib/Format/UnwrappedLineParser.cpp286
1 files changed, 248 insertions, 38 deletions
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 8fc3b78aee010..5be68ad5c6b82 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -202,7 +202,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback)
: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
- CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
+ CurrentLines(&Lines), Style(Style), Keywords(Keywords),
+ CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
void UnwrappedLineParser::reset() {
@@ -334,8 +335,11 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
case tok::l_brace:
if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
PrevTok->is(tok::colon))
- // In TypeScript's TypeMemberLists, there can be semicolons between the
- // individual members.
+ // A colon indicates this code is in a type, or a braced list following
+ // a label in an object literal ({a: {b: 1}}).
+ // The code below could be confused by semicolons between the individual
+ // members in a type member list, which would normally trigger BK_Block.
+ // In both cases, this must be parsed as an inline braced init.
Tok->BlockKind = BK_BracedInit;
else
Tok->BlockKind = BK_Unknown;
@@ -424,6 +428,8 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
parseParens();
addUnwrappedLine();
+ size_t OpeningLineIndex =
+ Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
MustBeDeclaration);
@@ -449,6 +455,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
if (MunchSemi && FormatTok->Tok.is(tok::semi))
nextToken();
Line->Level = InitialLevel;
+ Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
}
static bool isGoogScope(const UnwrappedLine &Line) {
@@ -582,13 +589,14 @@ void UnwrappedLineParser::conditionalCompilationEnd() {
}
void UnwrappedLineParser::parsePPIf(bool IfDef) {
+ bool IfNDef = FormatTok->is(tok::pp_ifndef);
nextToken();
- bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
- FormatTok->Tok.getLiteralData() != nullptr &&
- StringRef(FormatTok->Tok.getLiteralData(),
- FormatTok->Tok.getLength()) == "0") ||
- FormatTok->Tok.is(tok::kw_false);
- conditionalCompilationStart(!IfDef && IsLiteralFalse);
+ bool Unreachable = false;
+ if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
+ Unreachable = true;
+ if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
+ Unreachable = true;
+ conditionalCompilationStart(Unreachable);
parsePPUnknown();
}
@@ -746,8 +754,7 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
tok::minusminus)))
return addUnwrappedLine();
- if ((PreviousMustBeValue || Previous->is(tok::r_brace)) &&
- isJSDeclOrStmt(Keywords, Next))
+ if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
return addUnwrappedLine();
}
@@ -909,7 +916,8 @@ void UnwrappedLineParser::parseStructuralElement() {
return;
}
}
- if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
+ if (Style.isCpp() &&
+ FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
Keywords.kw_slots, Keywords.kw_qslots)) {
nextToken();
if (FormatTok->is(tok::colon)) {
@@ -943,7 +951,7 @@ void UnwrappedLineParser::parseStructuralElement() {
if (!parseEnum())
break;
// This only applies for C++.
- if (Style.Language != FormatStyle::LK_Cpp) {
+ if (!Style.isCpp()) {
addUnwrappedLine();
return;
}
@@ -1124,7 +1132,7 @@ void UnwrappedLineParser::parseStructuralElement() {
}
bool UnwrappedLineParser::tryToParseLambda() {
- if (Style.Language != FormatStyle::LK_Cpp) {
+ if (!Style.isCpp()) {
nextToken();
return false;
}
@@ -1298,6 +1306,12 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
continue;
}
}
+ if (FormatTok->is(tok::l_brace)) {
+ // Could be a method inside of a braced list `{a() { return 1; }}`.
+ if (tryToParseBracedList())
+ continue;
+ parseChildBlock();
+ }
}
switch (FormatTok->Tok.getKind()) {
case tok::caret:
@@ -1309,12 +1323,6 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
case tok::l_square:
tryToParseLambda();
break;
- case tok::l_brace:
- // Assume there are no blocks inside a braced init list apart
- // from the ones we explicitly parse out (like lambdas).
- FormatTok->BlockKind = BK_BracedInit;
- parseBracedList();
- break;
case tok::l_paren:
parseParens();
// JavaScript can just have free standing methods and getters/setters in
@@ -1325,6 +1333,12 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
break;
}
break;
+ case tok::l_brace:
+ // Assume there are no blocks inside a braced init list apart
+ // from the ones we explicitly parse out (like lambdas).
+ FormatTok->BlockKind = BK_BracedInit;
+ parseBracedList();
+ break;
case tok::r_brace:
nextToken();
return !HasError;
@@ -1381,6 +1395,12 @@ void UnwrappedLineParser::parseParens() {
if (FormatTok->Tok.is(tok::l_brace))
parseBracedList();
break;
+ case tok::kw_class:
+ if (Style.Language == FormatStyle::LK_JavaScript)
+ parseRecord(/*ParseAsExpr=*/true);
+ else
+ nextToken();
+ break;
case tok::identifier:
if (Style.Language == FormatStyle::LK_JavaScript &&
(FormatTok->is(Keywords.kw_function) ||
@@ -1722,8 +1742,7 @@ bool UnwrappedLineParser::parseEnum() {
nextToken();
// If there are two identifiers in a row, this is likely an elaborate
// return type. In Java, this can be "implements", etc.
- if (Style.Language == FormatStyle::LK_Cpp &&
- FormatTok->is(tok::identifier))
+ if (Style.isCpp() && FormatTok->is(tok::identifier))
return false;
}
}
@@ -1819,7 +1838,7 @@ void UnwrappedLineParser::parseJavaEnumBody() {
addUnwrappedLine();
}
-void UnwrappedLineParser::parseRecord() {
+void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
const FormatToken &InitialToken = *FormatTok;
nextToken();
@@ -1863,11 +1882,15 @@ void UnwrappedLineParser::parseRecord() {
}
}
if (FormatTok->Tok.is(tok::l_brace)) {
- if (ShouldBreakBeforeBrace(Style, InitialToken))
- addUnwrappedLine();
+ if (ParseAsExpr) {
+ parseChildBlock();
+ } else {
+ if (ShouldBreakBeforeBrace(Style, InitialToken))
+ addUnwrappedLine();
- parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
- /*MunchSemi=*/false);
+ parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
+ /*MunchSemi=*/false);
+ }
}
// There is no addUnwrappedLine() here so that we fall through to parsing a
// structural element afterwards. Thus, in "class A {} n, m;",
@@ -1999,7 +2022,9 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
E = Line.Tokens.end();
I != E; ++I) {
- llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
+ llvm::dbgs() << I->Tok->Tok.getName() << "["
+ << "T=" << I->Tok->Type
+ << ", OC=" << I->Tok->OriginalColumn << "] ";
}
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
E = Line.Tokens.end();
@@ -2024,6 +2049,7 @@ void UnwrappedLineParser::addUnwrappedLine() {
});
CurrentLines->push_back(std::move(*Line));
Line->Tokens.clear();
+ Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
CurrentLines->append(
std::make_move_iterator(PreprocessorDirectives.begin()),
@@ -2039,13 +2065,139 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
FormatTok.NewlinesBefore > 0;
}
+static bool isLineComment(const FormatToken &FormatTok) {
+ return FormatTok.is(tok::comment) &&
+ FormatTok.TokenText.startswith("//");
+}
+
+// Checks if \p FormatTok is a line comment that continues the line comment
+// section on \p Line.
+static bool continuesLineComment(const FormatToken &FormatTok,
+ const UnwrappedLine &Line,
+ llvm::Regex &CommentPragmasRegex) {
+ if (Line.Tokens.empty())
+ return false;
+
+ StringRef IndentContent = FormatTok.TokenText;
+ if (FormatTok.TokenText.startswith("//") ||
+ FormatTok.TokenText.startswith("/*"))
+ IndentContent = FormatTok.TokenText.substr(2);
+ if (CommentPragmasRegex.match(IndentContent))
+ return false;
+
+ // If Line starts with a line comment, then FormatTok continues the comment
+ // section if its original column is greater or equal to the original start
+ // column of the line.
+ //
+ // Define the min column token of a line as follows: if a line ends in '{' or
+ // contains a '{' followed by a line comment, then the min column token is
+ // that '{'. Otherwise, the min column token of the line is the first token of
+ // the line.
+ //
+ // If Line starts with a token other than a line comment, then FormatTok
+ // continues the comment section if its original column is greater than the
+ // original start column of the min column token of the line.
+ //
+ // For example, the second line comment continues the first in these cases:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // int i; // first line
+ // // second line
+ //
+ // and:
+ //
+ // do { // first line
+ // // second line
+ // int i;
+ // } while (true);
+ //
+ // and:
+ //
+ // enum {
+ // a, // first line
+ // // second line
+ // b
+ // };
+ //
+ // The second line comment doesn't continue the first in these cases:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // int i; // first line
+ // // second line
+ //
+ // and:
+ //
+ // do { // first line
+ // // second line
+ // int i;
+ // } while (true);
+ //
+ // and:
+ //
+ // enum {
+ // a, // first line
+ // // second line
+ // };
+ const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
+
+ // Scan for '{//'. If found, use the column of '{' as a min column for line
+ // comment section continuation.
+ const FormatToken *PreviousToken = nullptr;
+ for (const UnwrappedLineNode &Node : Line.Tokens) {
+ if (PreviousToken && PreviousToken->is(tok::l_brace) &&
+ isLineComment(*Node.Tok)) {
+ MinColumnToken = PreviousToken;
+ break;
+ }
+ PreviousToken = Node.Tok;
+
+ // Grab the last newline preceding a token in this unwrapped line.
+ if (Node.Tok->NewlinesBefore > 0) {
+ MinColumnToken = Node.Tok;
+ }
+ }
+ if (PreviousToken && PreviousToken->is(tok::l_brace)) {
+ MinColumnToken = PreviousToken;
+ }
+
+ unsigned MinContinueColumn =
+ MinColumnToken->OriginalColumn +
+ (isLineComment(*MinColumnToken) ? 0 : 1);
+ return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
+ isLineComment(*(Line.Tokens.back().Tok)) &&
+ FormatTok.OriginalColumn >= MinContinueColumn;
+}
+
void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
bool JustComments = Line->Tokens.empty();
for (SmallVectorImpl<FormatToken *>::const_iterator
I = CommentsBeforeNextToken.begin(),
E = CommentsBeforeNextToken.end();
I != E; ++I) {
- if (isOnNewLine(**I) && JustComments)
+ // Line comments that belong to the same line comment section are put on the
+ // same line since later we might want to reflow content between them.
+ // Additional fine-grained breaking of line comment sections is controlled
+ // by the class BreakableLineCommentSection in case it is desirable to keep
+ // several line comment sections in the same unwrapped line.
+ //
+ // FIXME: Consider putting separate line comment sections as children to the
+ // unwrapped line instead.
+ (*I)->ContinuesLineCommentSection =
+ continuesLineComment(**I, *Line, CommentPragmasRegex);
+ if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
addUnwrappedLine();
pushToken(*I);
}
@@ -2073,13 +2225,71 @@ const FormatToken *UnwrappedLineParser::getPreviousToken() {
return Line->Tokens.back().Tok;
}
+void UnwrappedLineParser::distributeComments(
+ const SmallVectorImpl<FormatToken *> &Comments,
+ const FormatToken *NextTok) {
+ // Whether or not a line comment token continues a line is controlled by
+ // the method continuesLineComment, with the following caveat:
+ //
+ // Define a trail of Comments to be a nonempty proper postfix of Comments such
+ // that each comment line from the trail is aligned with the next token, if
+ // the next token exists. If a trail exists, the beginning of the maximal
+ // trail is marked as a start of a new comment section.
+ //
+ // For example in this code:
+ //
+ // int a; // line about a
+ // // line 1 about b
+ // // line 2 about b
+ // int b;
+ //
+ // the two lines about b form a maximal trail, so there are two sections, the
+ // first one consisting of the single comment "// line about a" and the
+ // second one consisting of the next two comments.
+ if (Comments.empty())
+ return;
+ bool ShouldPushCommentsInCurrentLine = true;
+ bool HasTrailAlignedWithNextToken = false;
+ unsigned StartOfTrailAlignedWithNextToken = 0;
+ if (NextTok) {
+ // We are skipping the first element intentionally.
+ for (unsigned i = Comments.size() - 1; i > 0; --i) {
+ if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
+ HasTrailAlignedWithNextToken = true;
+ StartOfTrailAlignedWithNextToken = i;
+ }
+ }
+ }
+ for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
+ FormatToken *FormatTok = Comments[i];
+ if (HasTrailAlignedWithNextToken &&
+ i == StartOfTrailAlignedWithNextToken) {
+ FormatTok->ContinuesLineCommentSection = false;
+ } else {
+ FormatTok->ContinuesLineCommentSection =
+ continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
+ }
+ if (!FormatTok->ContinuesLineCommentSection &&
+ (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
+ ShouldPushCommentsInCurrentLine = false;
+ }
+ if (ShouldPushCommentsInCurrentLine) {
+ pushToken(FormatTok);
+ } else {
+ CommentsBeforeNextToken.push_back(FormatTok);
+ }
+ }
+}
+
void UnwrappedLineParser::readToken() {
- bool CommentsInCurrentLine = true;
+ SmallVector<FormatToken *, 1> Comments;
do {
FormatTok = Tokens->getNextToken();
assert(FormatTok);
while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
(FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
+ distributeComments(Comments, FormatTok);
+ Comments.clear();
// If there is an unfinished unwrapped line, we flush the preprocessor
// directives only after that unwrapped line was finished later.
bool SwitchToPreprocessorLines = !Line->Tokens.empty();
@@ -2109,17 +2319,17 @@ void UnwrappedLineParser::readToken() {
continue;
}
- if (!FormatTok->Tok.is(tok::comment))
+ if (!FormatTok->Tok.is(tok::comment)) {
+ distributeComments(Comments, FormatTok);
+ Comments.clear();
return;
- if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
- CommentsInCurrentLine = false;
- }
- if (CommentsInCurrentLine) {
- pushToken(FormatTok);
- } else {
- CommentsBeforeNextToken.push_back(FormatTok);
}
+
+ Comments.push_back(FormatTok);
} while (!eof());
+
+ distributeComments(Comments, nullptr);
+ Comments.clear();
}
void UnwrappedLineParser::pushToken(FormatToken *Tok) {