summaryrefslogtreecommitdiff
path: root/lib/Format/TokenAnnotator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Format/TokenAnnotator.cpp')
-rw-r--r--lib/Format/TokenAnnotator.cpp197
1 files changed, 155 insertions, 42 deletions
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 4a90522e6e31c..cf6373f456573 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -273,8 +273,9 @@ private:
!CurrentToken->Next->HasUnescapedNewline &&
!CurrentToken->Next->isTrailingComment())
HasMultipleParametersOnALine = true;
- if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
- CurrentToken->isSimpleTypeSpecifier())
+ if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
+ CurrentToken->Previous->isSimpleTypeSpecifier()) &&
+ !CurrentToken->is(tok::l_brace))
Contexts.back().IsExpression = false;
if (CurrentToken->isOneOf(tok::semi, tok::colon))
MightBeObjCForRangeLoop = false;
@@ -305,8 +306,19 @@ private:
FormatToken *Left = CurrentToken->Previous;
Left->ParentBracket = Contexts.back().ContextKind;
FormatToken *Parent = Left->getPreviousNonComment();
+
+ // Cases where '>' is followed by '['.
+ // In C++, this can happen either in array of templates (foo<int>[10])
+ // or when array is a nested template type (unique_ptr<type1<type2>[]>).
+ bool CppArrayTemplates =
+ Style.Language == FormatStyle::LK_Cpp && Parent &&
+ Parent->is(TT_TemplateCloser) &&
+ (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
+ Contexts.back().InTemplateArgument);
+
bool StartsObjCMethodExpr =
- Style.Language == FormatStyle::LK_Cpp &&
+ !CppArrayTemplates && (Style.Language == FormatStyle::LK_Cpp ||
+ Style.Language == FormatStyle::LK_ObjC) &&
Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
CurrentToken->isNot(tok::l_brace) &&
(!Parent ||
@@ -326,7 +338,7 @@ private:
Parent->isOneOf(tok::l_brace, tok::comma)) {
Left->Type = TT_JsComputedPropertyName;
} else if (Style.Language == FormatStyle::LK_Proto ||
- (Parent &&
+ (!CppArrayTemplates && Parent &&
Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
tok::comma, tok::l_paren, tok::l_square,
tok::question, tok::colon, tok::kw_return,
@@ -422,7 +434,8 @@ private:
FormatToken *Previous = CurrentToken->getPreviousNonComment();
if (((CurrentToken->is(tok::colon) &&
(!Contexts.back().ColonIsDictLiteral ||
- Style.Language != FormatStyle::LK_Cpp)) ||
+ (Style.Language != FormatStyle::LK_Cpp &&
+ Style.Language != FormatStyle::LK_ObjC))) ||
Style.Language == FormatStyle::LK_Proto) &&
(Previous->Tok.getIdentifierInfo() ||
Previous->is(tok::string_literal)))
@@ -431,6 +444,9 @@ private:
Style.Language == FormatStyle::LK_JavaScript)
Left->Type = TT_DictLiteral;
}
+ if (CurrentToken->is(tok::comma) &&
+ Style.Language == FormatStyle::LK_JavaScript)
+ Left->Type = TT_DictLiteral;
if (!consumeToken())
return false;
}
@@ -508,19 +524,29 @@ private:
} else if (Contexts.back().ColonIsObjCMethodExpr ||
Line.startsWith(TT_ObjCMethodSpecifier)) {
Tok->Type = TT_ObjCMethodExpr;
- Tok->Previous->Type = TT_SelectorName;
- if (Tok->Previous->ColumnWidth >
- Contexts.back().LongestObjCSelectorName)
- Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
- if (!Contexts.back().FirstObjCSelectorName)
- Contexts.back().FirstObjCSelectorName = Tok->Previous;
+ const FormatToken *BeforePrevious = Tok->Previous->Previous;
+ if (!BeforePrevious ||
+ !(BeforePrevious->is(TT_CastRParen) ||
+ (BeforePrevious->is(TT_ObjCMethodExpr) &&
+ BeforePrevious->is(tok::colon))) ||
+ BeforePrevious->is(tok::r_square) ||
+ Contexts.back().LongestObjCSelectorName == 0) {
+ Tok->Previous->Type = TT_SelectorName;
+ if (Tok->Previous->ColumnWidth >
+ Contexts.back().LongestObjCSelectorName)
+ Contexts.back().LongestObjCSelectorName =
+ Tok->Previous->ColumnWidth;
+ if (!Contexts.back().FirstObjCSelectorName)
+ Contexts.back().FirstObjCSelectorName = Tok->Previous;
+ }
} else if (Contexts.back().ColonIsForRangeExpr) {
Tok->Type = TT_RangeBasedForLoopColon;
} else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
Tok->Type = TT_BitFieldColon;
} else if (Contexts.size() == 1 &&
!Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
- if (Tok->Previous->isOneOf(tok::r_paren, tok::kw_noexcept))
+ if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren,
+ tok::kw_noexcept))
Tok->Type = TT_CtorInitializerColon;
else
Tok->Type = TT_InheritanceColon;
@@ -858,7 +884,8 @@ private:
if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
TT_FunctionLBrace, TT_ImplicitStringLiteral,
TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
- TT_RegexLiteral))
+ TT_OverloadedOperator, TT_RegexLiteral,
+ TT_TemplateString))
CurrentToken->Type = TT_Unknown;
CurrentToken->Role.reset();
CurrentToken->MatchingParen = nullptr;
@@ -1037,12 +1064,17 @@ private:
!Current.Next->isBinaryOperator() &&
!Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
tok::period, tok::arrow, tok::coloncolon))
- if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
- if (BeforeParen->is(tok::identifier) &&
- BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
- (!BeforeParen->Previous ||
- BeforeParen->Previous->ClosesTemplateDeclaration))
- Current.Type = TT_FunctionAnnotationRParen;
+ if (FormatToken *AfterParen = Current.MatchingParen->Next) {
+ // Make sure this isn't the return type of an Obj-C block declaration
+ if (AfterParen->Tok.isNot(tok::caret)) {
+ if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
+ if (BeforeParen->is(tok::identifier) &&
+ BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
+ (!BeforeParen->Previous ||
+ BeforeParen->Previous->ClosesTemplateDeclaration))
+ Current.Type = TT_FunctionAnnotationRParen;
+ }
+ }
} else if (Current.is(tok::at) && Current.Next) {
if (Current.Next->isStringLiteral()) {
Current.Type = TT_ObjCStringLiteral;
@@ -1144,6 +1176,7 @@ private:
bool rParenEndsCast(const FormatToken &Tok) {
// C-style casts are only used in C++ and Java.
if (Style.Language != FormatStyle::LK_Cpp &&
+ Style.Language != FormatStyle::LK_ObjC &&
Style.Language != FormatStyle::LK_Java)
return false;
@@ -1206,6 +1239,13 @@ private:
if (!LeftOfParens)
return false;
+ // Certain token types inside the parentheses mean that this can't be a
+ // cast.
+ for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
+ Token = Token->Next)
+ if (Token->is(TT_BinaryOperator))
+ return false;
+
// If the following token is an identifier or 'this', this is a cast. All
// cases where this can be something else are handled above.
if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
@@ -1243,7 +1283,7 @@ private:
const FormatToken *NextToken = Tok.getNextNonComment();
if (!NextToken ||
- NextToken->isOneOf(tok::arrow, Keywords.kw_final,
+ NextToken->isOneOf(tok::arrow, Keywords.kw_final, tok::equal,
Keywords.kw_override) ||
(NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
return TT_PointerOrReference;
@@ -1303,7 +1343,13 @@ private:
TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
const FormatToken *PrevToken = Tok.getPreviousNonComment();
- if (!PrevToken || PrevToken->is(TT_CastRParen))
+ if (!PrevToken)
+ return TT_UnaryOperator;
+
+ if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) &&
+ !PrevToken->is(tok::exclaim))
+ // There aren't any trailing unary operators except for TypeScript's
+ // non-null operator (!). Thus, this must be squence of leading operators.
return TT_UnaryOperator;
// Use heuristics to recognize unary operators.
@@ -1560,6 +1606,13 @@ void TokenAnnotator::setCommentLineLevels(
}
}
+static unsigned maxNestingDepth(const AnnotatedLine &Line) {
+ unsigned Result = 0;
+ for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
+ Result = std::max(Result, Tok->NestingLevel);
+ return Result;
+}
+
void TokenAnnotator::annotate(AnnotatedLine &Line) {
for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
E = Line.Children.end();
@@ -1568,6 +1621,14 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
}
AnnotatingParser Parser(Style, Line, Keywords);
Line.Type = Parser.parseLine();
+
+ // With very deep nesting, ExpressionParser uses lots of stack and the
+ // formatting algorithm is very slow. We're not going to do a good job here
+ // anyway - it's probably generated code being formatted by mistake.
+ // Just skip the whole line.
+ if (maxNestingDepth(Line) > 50)
+ Line.Type = LT_Invalid;
+
if (Line.Type == LT_Invalid)
return;
@@ -1816,10 +1877,12 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
return 100;
if (Left.is(TT_JsTypeColon))
return 35;
+ if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+ return 100;
}
- if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
- Right.Next->is(TT_DictLiteral)))
+ if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
return 1;
if (Right.is(tok::l_square)) {
if (Style.Language == FormatStyle::LK_Proto)
@@ -1935,20 +1998,24 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.is(TT_JavaAnnotation))
return 50;
+ if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
+ Left.Previous->isLabelString() &&
+ (Left.NextOperator || Left.OperatorIndex != 0))
+ return 45;
+ if (Right.is(tok::plus) && Left.isLabelString() &&
+ (Right.NextOperator || Right.OperatorIndex != 0))
+ return 25;
+ if (Left.is(tok::comma))
+ return 1;
+ if (Right.is(tok::lessless) && Left.isLabelString() &&
+ (Right.NextOperator || Right.OperatorIndex != 1))
+ return 25;
if (Right.is(tok::lessless)) {
- if (Left.is(tok::string_literal) &&
- (Right.NextOperator || Right.OperatorIndex != 1)) {
- StringRef Content = Left.TokenText;
- if (Content.startswith("\""))
- Content = Content.drop_front(1);
- if (Content.endswith("\""))
- Content = Content.drop_back(1);
- Content = Content.trim();
- if (Content.size() > 1 &&
- (Content.back() == ':' || Content.back() == '='))
- return 25;
- }
- return 1; // Breaking at a << is really cheap.
+ // Breaking at a << is really cheap.
+ if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
+ // Slightly prefer to break before the first one in log-like statements.
+ return 2;
+ return 1;
}
if (Left.is(TT_ConditionalExpr))
return prec::Conditional;
@@ -1984,9 +2051,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Right.isOneOf(tok::semi, tok::comma))
return false;
if (Right.is(tok::less) &&
- (Left.is(tok::kw_template) ||
- (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
+ Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)
return true;
+ if (Right.is(tok::less) && Left.is(tok::kw_template))
+ return Style.SpaceAfterTemplateKeyword;
if (Left.isOneOf(tok::exclaim, tok::tilde))
return false;
if (Left.is(tok::at) &&
@@ -2011,7 +2079,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
Left.Previous->is(tok::r_paren)) ||
(!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
(Style.PointerAlignment != FormatStyle::PAS_Left ||
- Line.IsMultiVariableDeclStmt)));
+ (Line.IsMultiVariableDeclStmt &&
+ (Left.NestingLevel == 0 ||
+ (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
(!Left.is(TT_PointerOrReference) ||
(Style.PointerAlignment != FormatStyle::PAS_Right &&
@@ -2113,13 +2183,31 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
} else if (Style.Language == FormatStyle::LK_JavaScript) {
if (Left.is(TT_JsFatArrow))
return true;
+ if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+ return false;
+ if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
+ return false;
if (Right.is(tok::star) &&
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
return false;
+ if (Right.isOneOf(tok::l_brace, tok::l_square) &&
+ Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
+ return true;
+ // JS methods can use some keywords as names (e.g. `delete()`).
+ if (Right.is(tok::l_paren) && Line.MustBeDeclaration &&
+ Left.Tok.getIdentifierInfo())
+ return false;
if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
Keywords.kw_of, tok::kw_const) &&
(!Left.Previous || !Left.Previous->is(tok::period)))
return true;
+ if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
+ Left.Previous->is(tok::period) && Right.is(tok::l_paren))
+ return false;
+ if (Left.is(Keywords.kw_as) &&
+ Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
+ return true;
if (Left.is(tok::kw_default) && Left.Previous &&
Left.Previous->is(tok::kw_export))
return true;
@@ -2146,6 +2234,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
tok::r_square, tok::r_brace) ||
Left.Tok.isLiteral()))
return false;
+ if (Left.is(tok::exclaim) && Right.is(Keywords.kw_as))
+ return true; // "x! as string"
} else if (Style.Language == FormatStyle::LK_Java) {
if (Left.is(tok::r_square) && Right.is(tok::l_brace))
return true;
@@ -2369,7 +2459,12 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
Keywords.kw_implements))
return true;
} else if (Style.Language == FormatStyle::LK_JavaScript) {
- if (Left.is(tok::kw_return))
+ const FormatToken *NonComment = Right.getPreviousNonComment();
+ if (Left.isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
+ tok::kw_throw) ||
+ (NonComment &&
+ NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,
+ tok::kw_throw)))
return false; // Otherwise a semicolon is inserted.
if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
return false;
@@ -2383,6 +2478,18 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
if (Right.is(Keywords.kw_as))
return false; // must not break before as in 'x as type' casts
+ if (Left.is(Keywords.kw_declare) &&
+ Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
+ Keywords.kw_function, tok::kw_class, tok::kw_enum,
+ Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
+ Keywords.kw_let, tok::kw_const))
+ // See grammar for 'declare' statements at:
+ // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
+ return false;
+ if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
+ Right.isOneOf(tok::identifier, tok::string_literal)) {
+ return false; // must not break in "module foo { ...}"
+ }
}
if (Left.is(tok::at))
@@ -2415,10 +2522,13 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return !Style.BreakBeforeTernaryOperators;
if (Right.is(TT_InheritanceColon))
return true;
+ if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
+ Left.isNot(TT_SelectorName))
+ return true;
if (Right.is(tok::colon) &&
!Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
return false;
- if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))
+ if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))
return true;
if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
Right.Next->is(TT_ObjCMethodExpr)))
@@ -2434,6 +2544,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return true;
if (Right.is(TT_RangeBasedForLoopColon))
return false;
+ if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
+ return true;
if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
Left.is(tok::kw_operator))
return false;
@@ -2522,7 +2634,8 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
<< " FakeLParens=";
for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
llvm::errs() << Tok->FakeLParens[i] << "/";
- llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
+ llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
+ llvm::errs() << " Text='" << Tok->TokenText << "'\n";
if (!Tok->Next)
assert(Tok == Line.Last);
Tok = Tok->Next;