diff options
Diffstat (limited to 'lib/Format/TokenAnnotator.cpp')
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 197 |
1 files changed, 155 insertions, 42 deletions
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 4a90522e6e31c..cf6373f456573 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -273,8 +273,9 @@ private: !CurrentToken->Next->HasUnescapedNewline && !CurrentToken->Next->isTrailingComment()) HasMultipleParametersOnALine = true; - if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || - CurrentToken->isSimpleTypeSpecifier()) + if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) || + CurrentToken->Previous->isSimpleTypeSpecifier()) && + !CurrentToken->is(tok::l_brace)) Contexts.back().IsExpression = false; if (CurrentToken->isOneOf(tok::semi, tok::colon)) MightBeObjCForRangeLoop = false; @@ -305,8 +306,19 @@ private: FormatToken *Left = CurrentToken->Previous; Left->ParentBracket = Contexts.back().ContextKind; FormatToken *Parent = Left->getPreviousNonComment(); + + // Cases where '>' is followed by '['. + // In C++, this can happen either in array of templates (foo<int>[10]) + // or when array is a nested template type (unique_ptr<type1<type2>[]>). + bool CppArrayTemplates = + Style.Language == FormatStyle::LK_Cpp && Parent && + Parent->is(TT_TemplateCloser) && + (Contexts.back().CanBeExpression || Contexts.back().IsExpression || + Contexts.back().InTemplateArgument); + bool StartsObjCMethodExpr = - Style.Language == FormatStyle::LK_Cpp && + !CppArrayTemplates && (Style.Language == FormatStyle::LK_Cpp || + Style.Language == FormatStyle::LK_ObjC) && Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && CurrentToken->isNot(tok::l_brace) && (!Parent || @@ -326,7 +338,7 @@ private: Parent->isOneOf(tok::l_brace, tok::comma)) { Left->Type = TT_JsComputedPropertyName; } else if (Style.Language == FormatStyle::LK_Proto || - (Parent && + (!CppArrayTemplates && Parent && Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, tok::comma, tok::l_paren, tok::l_square, tok::question, tok::colon, tok::kw_return, @@ -422,7 +434,8 @@ private: FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (((CurrentToken->is(tok::colon) && (!Contexts.back().ColonIsDictLiteral || - Style.Language != FormatStyle::LK_Cpp)) || + (Style.Language != FormatStyle::LK_Cpp && + Style.Language != FormatStyle::LK_ObjC))) || Style.Language == FormatStyle::LK_Proto) && (Previous->Tok.getIdentifierInfo() || Previous->is(tok::string_literal))) @@ -431,6 +444,9 @@ private: Style.Language == FormatStyle::LK_JavaScript) Left->Type = TT_DictLiteral; } + if (CurrentToken->is(tok::comma) && + Style.Language == FormatStyle::LK_JavaScript) + Left->Type = TT_DictLiteral; if (!consumeToken()) return false; } @@ -508,19 +524,29 @@ private: } else if (Contexts.back().ColonIsObjCMethodExpr || Line.startsWith(TT_ObjCMethodSpecifier)) { Tok->Type = TT_ObjCMethodExpr; - Tok->Previous->Type = TT_SelectorName; - if (Tok->Previous->ColumnWidth > - Contexts.back().LongestObjCSelectorName) - Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; - if (!Contexts.back().FirstObjCSelectorName) - Contexts.back().FirstObjCSelectorName = Tok->Previous; + const FormatToken *BeforePrevious = Tok->Previous->Previous; + if (!BeforePrevious || + !(BeforePrevious->is(TT_CastRParen) || + (BeforePrevious->is(TT_ObjCMethodExpr) && + BeforePrevious->is(tok::colon))) || + BeforePrevious->is(tok::r_square) || + Contexts.back().LongestObjCSelectorName == 0) { + Tok->Previous->Type = TT_SelectorName; + if (Tok->Previous->ColumnWidth > + Contexts.back().LongestObjCSelectorName) + Contexts.back().LongestObjCSelectorName = + Tok->Previous->ColumnWidth; + if (!Contexts.back().FirstObjCSelectorName) + Contexts.back().FirstObjCSelectorName = Tok->Previous; + } } else if (Contexts.back().ColonIsForRangeExpr) { Tok->Type = TT_RangeBasedForLoopColon; } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { Tok->Type = TT_BitFieldColon; } else if (Contexts.size() == 1 && !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { - if (Tok->Previous->isOneOf(tok::r_paren, tok::kw_noexcept)) + if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren, + tok::kw_noexcept)) Tok->Type = TT_CtorInitializerColon; else Tok->Type = TT_InheritanceColon; @@ -858,7 +884,8 @@ private: if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, - TT_RegexLiteral)) + TT_OverloadedOperator, TT_RegexLiteral, + TT_TemplateString)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -1037,12 +1064,17 @@ private: !Current.Next->isBinaryOperator() && !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, tok::period, tok::arrow, tok::coloncolon)) - if (FormatToken *BeforeParen = Current.MatchingParen->Previous) - if (BeforeParen->is(tok::identifier) && - BeforeParen->TokenText == BeforeParen->TokenText.upper() && - (!BeforeParen->Previous || - BeforeParen->Previous->ClosesTemplateDeclaration)) - Current.Type = TT_FunctionAnnotationRParen; + if (FormatToken *AfterParen = Current.MatchingParen->Next) { + // Make sure this isn't the return type of an Obj-C block declaration + if (AfterParen->Tok.isNot(tok::caret)) { + if (FormatToken *BeforeParen = Current.MatchingParen->Previous) + if (BeforeParen->is(tok::identifier) && + BeforeParen->TokenText == BeforeParen->TokenText.upper() && + (!BeforeParen->Previous || + BeforeParen->Previous->ClosesTemplateDeclaration)) + Current.Type = TT_FunctionAnnotationRParen; + } + } } else if (Current.is(tok::at) && Current.Next) { if (Current.Next->isStringLiteral()) { Current.Type = TT_ObjCStringLiteral; @@ -1144,6 +1176,7 @@ private: bool rParenEndsCast(const FormatToken &Tok) { // C-style casts are only used in C++ and Java. if (Style.Language != FormatStyle::LK_Cpp && + Style.Language != FormatStyle::LK_ObjC && Style.Language != FormatStyle::LK_Java) return false; @@ -1206,6 +1239,13 @@ private: if (!LeftOfParens) return false; + // Certain token types inside the parentheses mean that this can't be a + // cast. + for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok; + Token = Token->Next) + if (Token->is(TT_BinaryOperator)) + return false; + // If the following token is an identifier or 'this', this is a cast. All // cases where this can be something else are handled above. if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) @@ -1243,7 +1283,7 @@ private: const FormatToken *NextToken = Tok.getNextNonComment(); if (!NextToken || - NextToken->isOneOf(tok::arrow, Keywords.kw_final, + NextToken->isOneOf(tok::arrow, Keywords.kw_final, tok::equal, Keywords.kw_override) || (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) return TT_PointerOrReference; @@ -1303,7 +1343,13 @@ private: TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (!PrevToken || PrevToken->is(TT_CastRParen)) + if (!PrevToken) + return TT_UnaryOperator; + + if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) && + !PrevToken->is(tok::exclaim)) + // There aren't any trailing unary operators except for TypeScript's + // non-null operator (!). Thus, this must be squence of leading operators. return TT_UnaryOperator; // Use heuristics to recognize unary operators. @@ -1560,6 +1606,13 @@ void TokenAnnotator::setCommentLineLevels( } } +static unsigned maxNestingDepth(const AnnotatedLine &Line) { + unsigned Result = 0; + for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next) + Result = std::max(Result, Tok->NestingLevel); + return Result; +} + void TokenAnnotator::annotate(AnnotatedLine &Line) { for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), E = Line.Children.end(); @@ -1568,6 +1621,14 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { } AnnotatingParser Parser(Style, Line, Keywords); Line.Type = Parser.parseLine(); + + // With very deep nesting, ExpressionParser uses lots of stack and the + // formatting algorithm is very slow. We're not going to do a good job here + // anyway - it's probably generated code being formatted by mistake. + // Just skip the whole line. + if (maxNestingDepth(Line) > 50) + Line.Type = LT_Invalid; + if (Line.Type == LT_Invalid) return; @@ -1816,10 +1877,12 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 100; if (Left.is(TT_JsTypeColon)) return 35; + if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || + (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) + return 100; } - if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && - Right.Next->is(TT_DictLiteral))) + if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) return 1; if (Right.is(tok::l_square)) { if (Style.Language == FormatStyle::LK_Proto) @@ -1935,20 +1998,24 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(TT_JavaAnnotation)) return 50; + if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous && + Left.Previous->isLabelString() && + (Left.NextOperator || Left.OperatorIndex != 0)) + return 45; + if (Right.is(tok::plus) && Left.isLabelString() && + (Right.NextOperator || Right.OperatorIndex != 0)) + return 25; + if (Left.is(tok::comma)) + return 1; + if (Right.is(tok::lessless) && Left.isLabelString() && + (Right.NextOperator || Right.OperatorIndex != 1)) + return 25; if (Right.is(tok::lessless)) { - if (Left.is(tok::string_literal) && - (Right.NextOperator || Right.OperatorIndex != 1)) { - StringRef Content = Left.TokenText; - if (Content.startswith("\"")) - Content = Content.drop_front(1); - if (Content.endswith("\"")) - Content = Content.drop_back(1); - Content = Content.trim(); - if (Content.size() > 1 && - (Content.back() == ':' || Content.back() == '=')) - return 25; - } - return 1; // Breaking at a << is really cheap. + // Breaking at a << is really cheap. + if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) + // Slightly prefer to break before the first one in log-like statements. + return 2; + return 1; } if (Left.is(TT_ConditionalExpr)) return prec::Conditional; @@ -1984,9 +2051,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Right.isOneOf(tok::semi, tok::comma)) return false; if (Right.is(tok::less) && - (Left.is(tok::kw_template) || - (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) + Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList) return true; + if (Right.is(tok::less) && Left.is(tok::kw_template)) + return Style.SpaceAfterTemplateKeyword; if (Left.isOneOf(tok::exclaim, tok::tilde)) return false; if (Left.is(tok::at) && @@ -2011,7 +2079,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Left.Previous->is(tok::r_paren)) || (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && (Style.PointerAlignment != FormatStyle::PAS_Left || - Line.IsMultiVariableDeclStmt))); + (Line.IsMultiVariableDeclStmt && + (Left.NestingLevel == 0 || + (Left.NestingLevel == 1 && Line.First->is(tok::kw_for))))))); if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && (!Left.is(TT_PointerOrReference) || (Style.PointerAlignment != FormatStyle::PAS_Right && @@ -2113,13 +2183,31 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(TT_JsFatArrow)) return true; + if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || + (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) + return false; + if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) + return false; if (Right.is(tok::star) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) return false; + if (Right.isOneOf(tok::l_brace, tok::l_square) && + Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) + return true; + // JS methods can use some keywords as names (e.g. `delete()`). + if (Right.is(tok::l_paren) && Line.MustBeDeclaration && + Left.Tok.getIdentifierInfo()) + return false; if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, Keywords.kw_of, tok::kw_const) && (!Left.Previous || !Left.Previous->is(tok::period))) return true; + if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && + Left.Previous->is(tok::period) && Right.is(tok::l_paren)) + return false; + if (Left.is(Keywords.kw_as) && + Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) + return true; if (Left.is(tok::kw_default) && Left.Previous && Left.Previous->is(tok::kw_export)) return true; @@ -2146,6 +2234,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, tok::r_square, tok::r_brace) || Left.Tok.isLiteral())) return false; + if (Left.is(tok::exclaim) && Right.is(Keywords.kw_as)) + return true; // "x! as string" } else if (Style.Language == FormatStyle::LK_Java) { if (Left.is(tok::r_square) && Right.is(tok::l_brace)) return true; @@ -2369,7 +2459,12 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Keywords.kw_implements)) return true; } else if (Style.Language == FormatStyle::LK_JavaScript) { - if (Left.is(tok::kw_return)) + const FormatToken *NonComment = Right.getPreviousNonComment(); + if (Left.isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, + tok::kw_throw) || + (NonComment && + NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, + tok::kw_throw))) return false; // Otherwise a semicolon is inserted. if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) return false; @@ -2383,6 +2478,18 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; if (Right.is(Keywords.kw_as)) return false; // must not break before as in 'x as type' casts + if (Left.is(Keywords.kw_declare) && + Right.isOneOf(Keywords.kw_module, tok::kw_namespace, + Keywords.kw_function, tok::kw_class, tok::kw_enum, + Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var, + Keywords.kw_let, tok::kw_const)) + // See grammar for 'declare' statements at: + // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10 + return false; + if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && + Right.isOneOf(tok::identifier, tok::string_literal)) { + return false; // must not break in "module foo { ...}" + } } if (Left.is(tok::at)) @@ -2415,10 +2522,13 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return !Style.BreakBeforeTernaryOperators; if (Right.is(TT_InheritanceColon)) return true; + if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) && + Left.isNot(TT_SelectorName)) + return true; if (Right.is(tok::colon) && !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) return false; - if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))) + if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) return true; if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_ObjCMethodExpr))) @@ -2434,6 +2544,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return true; if (Right.is(TT_RangeBasedForLoopColon)) return false; + if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener)) + return true; if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || Left.is(tok::kw_operator)) return false; @@ -2522,7 +2634,8 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { << " FakeLParens="; for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) llvm::errs() << Tok->FakeLParens[i] << "/"; - llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; + llvm::errs() << " FakeRParens=" << Tok->FakeRParens; + llvm::errs() << " Text='" << Tok->TokenText << "'\n"; if (!Tok->Next) assert(Tok == Line.Last); Tok = Tok->Next; |