diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:02:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:02:28 +0000 |
commit | 7442d6faa2719e4e7d33a7021c406c5a4facd74d (patch) | |
tree | c72b9241553fc9966179aba84f90f17bfa9235c3 /lib/Format/UnwrappedLineParser.cpp | |
parent | b52119637f743680a99710ce5fdb6646da2772af (diff) |
Notes
Diffstat (limited to 'lib/Format/UnwrappedLineParser.cpp')
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 286 |
1 files changed, 248 insertions, 38 deletions
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 8fc3b78aee010..5be68ad5c6b82 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -202,7 +202,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr), + CurrentLines(&Lines), Style(Style), Keywords(Keywords), + CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} void UnwrappedLineParser::reset() { @@ -334,8 +335,11 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { case tok::l_brace: if (Style.Language == FormatStyle::LK_JavaScript && PrevTok && PrevTok->is(tok::colon)) - // In TypeScript's TypeMemberLists, there can be semicolons between the - // individual members. + // A colon indicates this code is in a type, or a braced list following + // a label in an object literal ({a: {b: 1}}). + // The code below could be confused by semicolons between the individual + // members in a type member list, which would normally trigger BK_Block. + // In both cases, this must be parsed as an inline braced init. Tok->BlockKind = BK_BracedInit; else Tok->BlockKind = BK_Unknown; @@ -424,6 +428,8 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, parseParens(); addUnwrappedLine(); + size_t OpeningLineIndex = + Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); @@ -449,6 +455,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, if (MunchSemi && FormatTok->Tok.is(tok::semi)) nextToken(); Line->Level = InitialLevel; + Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; } static bool isGoogScope(const UnwrappedLine &Line) { @@ -582,13 +589,14 @@ void UnwrappedLineParser::conditionalCompilationEnd() { } void UnwrappedLineParser::parsePPIf(bool IfDef) { + bool IfNDef = FormatTok->is(tok::pp_ifndef); nextToken(); - bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && - FormatTok->Tok.getLiteralData() != nullptr && - StringRef(FormatTok->Tok.getLiteralData(), - FormatTok->Tok.getLength()) == "0") || - FormatTok->Tok.is(tok::kw_false); - conditionalCompilationStart(!IfDef && IsLiteralFalse); + bool Unreachable = false; + if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) + Unreachable = true; + if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") + Unreachable = true; + conditionalCompilationStart(Unreachable); parsePPUnknown(); } @@ -746,8 +754,7 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, tok::minusminus))) return addUnwrappedLine(); - if ((PreviousMustBeValue || Previous->is(tok::r_brace)) && - isJSDeclOrStmt(Keywords, Next)) + if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) return addUnwrappedLine(); } @@ -909,7 +916,8 @@ void UnwrappedLineParser::parseStructuralElement() { return; } } - if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, + if (Style.isCpp() && + FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, Keywords.kw_slots, Keywords.kw_qslots)) { nextToken(); if (FormatTok->is(tok::colon)) { @@ -943,7 +951,7 @@ void UnwrappedLineParser::parseStructuralElement() { if (!parseEnum()) break; // This only applies for C++. - if (Style.Language != FormatStyle::LK_Cpp) { + if (!Style.isCpp()) { addUnwrappedLine(); return; } @@ -1124,7 +1132,7 @@ void UnwrappedLineParser::parseStructuralElement() { } bool UnwrappedLineParser::tryToParseLambda() { - if (Style.Language != FormatStyle::LK_Cpp) { + if (!Style.isCpp()) { nextToken(); return false; } @@ -1298,6 +1306,12 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { continue; } } + if (FormatTok->is(tok::l_brace)) { + // Could be a method inside of a braced list `{a() { return 1; }}`. + if (tryToParseBracedList()) + continue; + parseChildBlock(); + } } switch (FormatTok->Tok.getKind()) { case tok::caret: @@ -1309,12 +1323,6 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { case tok::l_square: tryToParseLambda(); break; - case tok::l_brace: - // Assume there are no blocks inside a braced init list apart - // from the ones we explicitly parse out (like lambdas). - FormatTok->BlockKind = BK_BracedInit; - parseBracedList(); - break; case tok::l_paren: parseParens(); // JavaScript can just have free standing methods and getters/setters in @@ -1325,6 +1333,12 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { break; } break; + case tok::l_brace: + // Assume there are no blocks inside a braced init list apart + // from the ones we explicitly parse out (like lambdas). + FormatTok->BlockKind = BK_BracedInit; + parseBracedList(); + break; case tok::r_brace: nextToken(); return !HasError; @@ -1381,6 +1395,12 @@ void UnwrappedLineParser::parseParens() { if (FormatTok->Tok.is(tok::l_brace)) parseBracedList(); break; + case tok::kw_class: + if (Style.Language == FormatStyle::LK_JavaScript) + parseRecord(/*ParseAsExpr=*/true); + else + nextToken(); + break; case tok::identifier: if (Style.Language == FormatStyle::LK_JavaScript && (FormatTok->is(Keywords.kw_function) || @@ -1722,8 +1742,7 @@ bool UnwrappedLineParser::parseEnum() { nextToken(); // If there are two identifiers in a row, this is likely an elaborate // return type. In Java, this can be "implements", etc. - if (Style.Language == FormatStyle::LK_Cpp && - FormatTok->is(tok::identifier)) + if (Style.isCpp() && FormatTok->is(tok::identifier)) return false; } } @@ -1819,7 +1838,7 @@ void UnwrappedLineParser::parseJavaEnumBody() { addUnwrappedLine(); } -void UnwrappedLineParser::parseRecord() { +void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { const FormatToken &InitialToken = *FormatTok; nextToken(); @@ -1863,11 +1882,15 @@ void UnwrappedLineParser::parseRecord() { } } if (FormatTok->Tok.is(tok::l_brace)) { - if (ShouldBreakBeforeBrace(Style, InitialToken)) - addUnwrappedLine(); + if (ParseAsExpr) { + parseChildBlock(); + } else { + if (ShouldBreakBeforeBrace(Style, InitialToken)) + addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, - /*MunchSemi=*/false); + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, + /*MunchSemi=*/false); + } } // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "class A {} n, m;", @@ -1999,7 +2022,9 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) { - llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; + llvm::dbgs() << I->Tok->Tok.getName() << "[" + << "T=" << I->Tok->Type + << ", OC=" << I->Tok->OriginalColumn << "] "; } for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2024,6 +2049,7 @@ void UnwrappedLineParser::addUnwrappedLine() { }); CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); + Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), @@ -2039,13 +2065,139 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { FormatTok.NewlinesBefore > 0; } +static bool isLineComment(const FormatToken &FormatTok) { + return FormatTok.is(tok::comment) && + FormatTok.TokenText.startswith("//"); +} + +// Checks if \p FormatTok is a line comment that continues the line comment +// section on \p Line. +static bool continuesLineComment(const FormatToken &FormatTok, + const UnwrappedLine &Line, + llvm::Regex &CommentPragmasRegex) { + if (Line.Tokens.empty()) + return false; + + StringRef IndentContent = FormatTok.TokenText; + if (FormatTok.TokenText.startswith("//") || + FormatTok.TokenText.startswith("/*")) + IndentContent = FormatTok.TokenText.substr(2); + if (CommentPragmasRegex.match(IndentContent)) + return false; + + // If Line starts with a line comment, then FormatTok continues the comment + // section if its original column is greater or equal to the original start + // column of the line. + // + // Define the min column token of a line as follows: if a line ends in '{' or + // contains a '{' followed by a line comment, then the min column token is + // that '{'. Otherwise, the min column token of the line is the first token of + // the line. + // + // If Line starts with a token other than a line comment, then FormatTok + // continues the comment section if its original column is greater than the + // original start column of the min column token of the line. + // + // For example, the second line comment continues the first in these cases: + // + // // first line + // // second line + // + // and: + // + // // first line + // // second line + // + // and: + // + // int i; // first line + // // second line + // + // and: + // + // do { // first line + // // second line + // int i; + // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // b + // }; + // + // The second line comment doesn't continue the first in these cases: + // + // // first line + // // second line + // + // and: + // + // int i; // first line + // // second line + // + // and: + // + // do { // first line + // // second line + // int i; + // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // }; + const FormatToken *MinColumnToken = Line.Tokens.front().Tok; + + // Scan for '{//'. If found, use the column of '{' as a min column for line + // comment section continuation. + const FormatToken *PreviousToken = nullptr; + for (const UnwrappedLineNode &Node : Line.Tokens) { + if (PreviousToken && PreviousToken->is(tok::l_brace) && + isLineComment(*Node.Tok)) { + MinColumnToken = PreviousToken; + break; + } + PreviousToken = Node.Tok; + + // Grab the last newline preceding a token in this unwrapped line. + if (Node.Tok->NewlinesBefore > 0) { + MinColumnToken = Node.Tok; + } + } + if (PreviousToken && PreviousToken->is(tok::l_brace)) { + MinColumnToken = PreviousToken; + } + + unsigned MinContinueColumn = + MinColumnToken->OriginalColumn + + (isLineComment(*MinColumnToken) ? 0 : 1); + return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && + isLineComment(*(Line.Tokens.back().Tok)) && + FormatTok.OriginalColumn >= MinContinueColumn; +} + void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); for (SmallVectorImpl<FormatToken *>::const_iterator I = CommentsBeforeNextToken.begin(), E = CommentsBeforeNextToken.end(); I != E; ++I) { - if (isOnNewLine(**I) && JustComments) + // Line comments that belong to the same line comment section are put on the + // same line since later we might want to reflow content between them. + // Additional fine-grained breaking of line comment sections is controlled + // by the class BreakableLineCommentSection in case it is desirable to keep + // several line comment sections in the same unwrapped line. + // + // FIXME: Consider putting separate line comment sections as children to the + // unwrapped line instead. + (*I)->ContinuesLineCommentSection = + continuesLineComment(**I, *Line, CommentPragmasRegex); + if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) addUnwrappedLine(); pushToken(*I); } @@ -2073,13 +2225,71 @@ const FormatToken *UnwrappedLineParser::getPreviousToken() { return Line->Tokens.back().Tok; } +void UnwrappedLineParser::distributeComments( + const SmallVectorImpl<FormatToken *> &Comments, + const FormatToken *NextTok) { + // Whether or not a line comment token continues a line is controlled by + // the method continuesLineComment, with the following caveat: + // + // Define a trail of Comments to be a nonempty proper postfix of Comments such + // that each comment line from the trail is aligned with the next token, if + // the next token exists. If a trail exists, the beginning of the maximal + // trail is marked as a start of a new comment section. + // + // For example in this code: + // + // int a; // line about a + // // line 1 about b + // // line 2 about b + // int b; + // + // the two lines about b form a maximal trail, so there are two sections, the + // first one consisting of the single comment "// line about a" and the + // second one consisting of the next two comments. + if (Comments.empty()) + return; + bool ShouldPushCommentsInCurrentLine = true; + bool HasTrailAlignedWithNextToken = false; + unsigned StartOfTrailAlignedWithNextToken = 0; + if (NextTok) { + // We are skipping the first element intentionally. + for (unsigned i = Comments.size() - 1; i > 0; --i) { + if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { + HasTrailAlignedWithNextToken = true; + StartOfTrailAlignedWithNextToken = i; + } + } + } + for (unsigned i = 0, e = Comments.size(); i < e; ++i) { + FormatToken *FormatTok = Comments[i]; + if (HasTrailAlignedWithNextToken && + i == StartOfTrailAlignedWithNextToken) { + FormatTok->ContinuesLineCommentSection = false; + } else { + FormatTok->ContinuesLineCommentSection = + continuesLineComment(*FormatTok, *Line, CommentPragmasRegex); + } + if (!FormatTok->ContinuesLineCommentSection && + (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { + ShouldPushCommentsInCurrentLine = false; + } + if (ShouldPushCommentsInCurrentLine) { + pushToken(FormatTok); + } else { + CommentsBeforeNextToken.push_back(FormatTok); + } + } +} + void UnwrappedLineParser::readToken() { - bool CommentsInCurrentLine = true; + SmallVector<FormatToken *, 1> Comments; do { FormatTok = Tokens->getNextToken(); assert(FormatTok); while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { + distributeComments(Comments, FormatTok); + Comments.clear(); // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = !Line->Tokens.empty(); @@ -2109,17 +2319,17 @@ void UnwrappedLineParser::readToken() { continue; } - if (!FormatTok->Tok.is(tok::comment)) + if (!FormatTok->Tok.is(tok::comment)) { + distributeComments(Comments, FormatTok); + Comments.clear(); return; - if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { - CommentsInCurrentLine = false; - } - if (CommentsInCurrentLine) { - pushToken(FormatTok); - } else { - CommentsBeforeNextToken.push_back(FormatTok); } + + Comments.push_back(FormatTok); } while (!eof()); + + distributeComments(Comments, nullptr); + Comments.clear(); } void UnwrappedLineParser::pushToken(FormatToken *Tok) { |