diff options
Diffstat (limited to 'clang/lib/Format')
-rw-r--r-- | clang/lib/Format/BreakableToken.cpp | 9 | ||||
-rw-r--r-- | clang/lib/Format/ContinuationIndenter.cpp | 174 | ||||
-rw-r--r-- | clang/lib/Format/ContinuationIndenter.h | 44 | ||||
-rw-r--r-- | clang/lib/Format/Format.cpp | 271 | ||||
-rw-r--r-- | clang/lib/Format/FormatToken.cpp | 5 | ||||
-rw-r--r-- | clang/lib/Format/FormatToken.h | 104 | ||||
-rw-r--r-- | clang/lib/Format/FormatTokenLexer.cpp | 291 | ||||
-rw-r--r-- | clang/lib/Format/FormatTokenLexer.h | 20 | ||||
-rw-r--r-- | clang/lib/Format/NamespaceEndCommentsFixer.cpp | 37 | ||||
-rw-r--r-- | clang/lib/Format/SortJavaScriptImports.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Format/TokenAnalyzer.cpp | 17 | ||||
-rw-r--r-- | clang/lib/Format/TokenAnnotator.cpp | 697 | ||||
-rw-r--r-- | clang/lib/Format/UnwrappedLineFormatter.cpp | 38 | ||||
-rw-r--r-- | clang/lib/Format/UnwrappedLineParser.cpp | 382 | ||||
-rw-r--r-- | clang/lib/Format/UnwrappedLineParser.h | 9 | ||||
-rw-r--r-- | clang/lib/Format/WhitespaceManager.cpp | 187 | ||||
-rw-r--r-- | clang/lib/Format/WhitespaceManager.h | 30 |
17 files changed, 1898 insertions, 419 deletions
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index cd0eb0b4324a..15fbe3b6515d 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -587,9 +587,8 @@ void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; unsigned CharsToRemove = Split.second; assert(LocalIndentAtLineBreak >= Prefix.size()); - std::string PrefixWithTrailingIndent = Prefix; - for (unsigned I = 0; I < ContentIndent; ++I) - PrefixWithTrailingIndent += " "; + std::string PrefixWithTrailingIndent = std::string(Prefix); + PrefixWithTrailingIndent.append(ContentIndent, ' '); Whitespaces.replaceWhitespaceInToken( tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1, @@ -864,7 +863,8 @@ void BreakableLineCommentSection::reflow(unsigned LineIndex, // tokens by the empty string. Whitespaces.replaceWhitespace( *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, - /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); + /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true, + /*InPPDirective=*/false); } else if (LineIndex > 0) { // In case we're reflowing after the '\' in: // @@ -932,6 +932,7 @@ void BreakableLineCommentSection::adaptStartOfLine( /*Newlines=*/1, /*Spaces=*/LineColumn, /*StartOfTokenColumn=*/LineColumn, + /*IsAligned=*/true, /*InPPDirective=*/false); } if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 2ff6e5ec2344..b1497651a8fe 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -329,6 +329,11 @@ bool ContinuationIndenter::canBreak(const LineState &State) { bool ContinuationIndenter::mustBreak(const LineState &State) { const FormatToken &Current = *State.NextToken; const FormatToken &Previous = *Current.Previous; + if (Style.BraceWrapping.BeforeLambdaBody && Current.CanBreakBefore && + Current.is(TT_LambdaLBrace) && Previous.isNot(TT_LineComment)) { + auto LambdaBodyLength = getLengthToMatchingParen(Current, State.Stack); + return (LambdaBodyLength > getColumnLimit(State)); + } if (Current.MustBreakBefore || Current.is(TT_InlineASMColon)) return true; if (State.Stack.back().BreakBeforeClosingBrace && @@ -337,10 +342,16 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) return true; if (Style.Language == FormatStyle::LK_ObjC && + Style.ObjCBreakBeforeNestedBlockParam && Current.ObjCSelectorNameParts > 1 && Current.startsSequence(TT_SelectorName, tok::colon, tok::caret)) { return true; } + // Avoid producing inconsistent states by requiring breaks where they are not + // permitted for C# generic type constraints. + if (State.Stack.back().IsCSharpGenericTypeConstraint && + Previous.isNot(TT_CSharpGenericTypeConstraintComma)) + return false; if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) && Style.isCpp() && @@ -356,6 +367,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() && !Current.isOneOf(tok::r_paren, tok::r_brace)) return true; + if (State.Stack.back().IsChainedConditional && + ((Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) && + Current.is(tok::colon)) || + (!Style.BreakBeforeTernaryOperators && Previous.is(TT_ConditionalExpr) && + Previous.is(tok::colon)))) + return true; if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) || (Previous.is(TT_ArrayInitializerLSquare) && Previous.ParameterCount > 1) || @@ -412,7 +429,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { State.Stack.back().BreakBeforeParameter && Current.CanBreakBefore) return true; - if (State.Column <= NewLineColumn) + if (!State.Line->First->is(tok::kw_enum) && State.Column <= NewLineColumn) return false; if (Style.AlwaysBreakBeforeMultilineStrings && @@ -629,9 +646,12 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, State.Stack.back().NoLineBreak = true; if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && + !State.Stack.back().IsCSharpGenericTypeConstraint && Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) && - (Current.isNot(TT_LineComment) || Previous.BlockKind == BK_BracedInit)) + (Current.isNot(TT_LineComment) || Previous.BlockKind == BK_BracedInit)) { State.Stack.back().Indent = State.Column + Spaces; + State.Stack.back().IsAligned = true; + } if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style)) State.Stack.back().NoLineBreak = true; if (startsSegmentOfBuilderTypeCall(Current) && @@ -673,7 +693,9 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // does not help. bool HasTwoOperands = P->OperatorIndex == 0 && !P->NextOperator && !P->is(TT_ConditionalExpr); - if ((!BreakBeforeOperator && !(HasTwoOperands && Style.AlignOperands)) || + if ((!BreakBeforeOperator && + !(HasTwoOperands && + Style.AlignOperands != FormatStyle::OAS_DontAlign)) || (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator)) State.Stack.back().NoLineBreakInOperand = true; } @@ -710,6 +732,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, } else if (Previous.is(TT_InheritanceColon)) { State.Stack.back().Indent = State.Column; State.Stack.back().LastSpace = State.Column; + } else if (Current.is(TT_CSharpGenericTypeConstraintColon)) { + State.Stack.back().ColonPos = State.Column; } else if (Previous.opensScope()) { // If a function has a trailing call, indent all parameters from the // opening parenthesis. This avoids confusing indents like: @@ -844,6 +868,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, bool ContinuePPDirective = State.Line->InPPDirective && State.Line->Type != LT_ImportStatement; Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column, + State.Stack.back().IsAligned, ContinuePPDirective); } @@ -861,8 +886,10 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // Any break on this level means that the parent level has been broken // and we need to avoid bin packing there. bool NestedBlockSpecialCase = - !Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 && - State.Stack[State.Stack.size() - 2].NestedBlockInlined; + (!Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 && + State.Stack[State.Stack.size() - 2].NestedBlockInlined) || + (Style.Language == FormatStyle::LK_ObjC && Current.is(tok::r_brace) && + State.Stack.size() > 1 && !Style.ObjCBreakBeforeNestedBlockParam); if (!NestedBlockSpecialCase) for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) State.Stack[i].BreakBeforeParameter = true; @@ -917,7 +944,13 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (!State.NextToken || !State.NextToken->Previous) return 0; + FormatToken &Current = *State.NextToken; + + if (State.Stack.back().IsCSharpGenericTypeConstraint && + Current.isNot(TT_CSharpGenericTypeConstraint)) + return State.Stack.back().ColonPos + 2; + const FormatToken &Previous = *Current.Previous; // If we are continuing an expression, we want to use the continuation indent. unsigned ContinuationIndent = @@ -997,8 +1030,28 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (State.Stack.back().QuestionColumn != 0 && ((NextNonComment->is(tok::colon) && NextNonComment->is(TT_ConditionalExpr)) || - Previous.is(TT_ConditionalExpr))) + Previous.is(TT_ConditionalExpr))) { + if (((NextNonComment->is(tok::colon) && NextNonComment->Next && + !NextNonComment->Next->FakeLParens.empty() && + NextNonComment->Next->FakeLParens.back() == prec::Conditional) || + (Previous.is(tok::colon) && !Current.FakeLParens.empty() && + Current.FakeLParens.back() == prec::Conditional)) && + !State.Stack.back().IsWrappedConditional) { + // NOTE: we may tweak this slightly: + // * not remove the 'lead' ContinuationIndentWidth + // * always un-indent by the operator when + // BreakBeforeTernaryOperators=true + unsigned Indent = State.Stack.back().Indent; + if (Style.AlignOperands != FormatStyle::OAS_DontAlign) { + Indent -= Style.ContinuationIndentWidth; + } + if (Style.BreakBeforeTernaryOperators && + State.Stack.back().UnindentOperator) + Indent -= 2; + return Indent; + } return State.Stack.back().QuestionColumn; + } if (Previous.is(tok::comma) && State.Stack.back().VariablePos != 0) return State.Stack.back().VariablePos; if ((PreviousNonComment && @@ -1040,6 +1093,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (NextNonComment->is(TT_ArraySubscriptLSquare)) { if (State.Stack.back().StartOfArraySubscripts != 0) return State.Stack.back().StartOfArraySubscripts; + else if (Style.isCSharp()) // C# allows `["key"] = value` inside object + // initializers. + return State.Stack.back().Indent; return ContinuationIndent; } @@ -1071,6 +1127,13 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return ContinuationIndent; if (Current.is(TT_ProtoExtensionLSquare)) return State.Stack.back().Indent; + if (Current.isBinaryOperator() && State.Stack.back().UnindentOperator) + return State.Stack.back().Indent - Current.Tok.getLength() - + Current.SpacesRequiredBefore; + if (Current.isOneOf(tok::comment, TT_BlockComment, TT_LineComment) && + NextNonComment->isBinaryOperator() && State.Stack.back().UnindentOperator) + return State.Stack.back().Indent - NextNonComment->Tok.getLength() - + NextNonComment->SpacesRequiredBefore; if (State.Stack.back().Indent == State.FirstIndent && PreviousNonComment && !PreviousNonComment->isOneOf(tok::r_brace, TT_CtorInitializerComma)) // Ensure that we fall back to the continuation indent width instead of @@ -1079,14 +1142,28 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return State.Stack.back().Indent; } +static bool hasNestedBlockInlined(const FormatToken *Previous, + const FormatToken &Current, + const FormatStyle &Style) { + if (Previous->isNot(tok::l_paren)) + return true; + if (Previous->ParameterCount > 1) + return true; + + // Also a nested block if contains a lambda inside function with 1 parameter + return (Style.BraceWrapping.BeforeLambdaBody && Current.is(TT_LambdaLSquare)); +} + unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, bool DryRun, bool Newline) { assert(State.Stack.size()); const FormatToken &Current = *State.NextToken; + if (Current.is(TT_CSharpGenericTypeConstraint)) + State.Stack.back().IsCSharpGenericTypeConstraint = true; if (Current.isOneOf(tok::comma, TT_BinaryOperator)) State.Stack.back().NoLineBreakInOperand = false; - if (Current.is(TT_InheritanceColon)) + if (Current.isOneOf(TT_InheritanceColon, TT_CSharpGenericTypeConstraintColon)) State.Stack.back().AvoidBinPacking = true; if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) { if (State.Stack.back().FirstLessLess == 0) @@ -1102,6 +1179,11 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Current.is(TT_ArraySubscriptLSquare) && State.Stack.back().StartOfArraySubscripts == 0) State.Stack.back().StartOfArraySubscripts = State.Column; + if (Current.is(TT_ConditionalExpr) && Current.is(tok::question) && + ((Current.MustBreakBefore) || + (Current.getNextNonComment() && + Current.getNextNonComment()->MustBreakBefore))) + State.Stack.back().IsWrappedConditional = true; if (Style.BreakBeforeTernaryOperators && Current.is(tok::question)) State.Stack.back().QuestionColumn = State.Column; if (!Style.BreakBeforeTernaryOperators && Current.isNot(tok::colon)) { @@ -1181,8 +1263,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) && !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { State.Stack.back().NestedBlockInlined = - !Newline && - (Previous->isNot(tok::l_paren) || Previous->ParameterCount > 1); + !Newline && hasNestedBlockInlined(Previous, Current, Style); } moveStatePastFakeLParens(State, Newline); @@ -1233,7 +1314,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, (Previous && (Previous->opensScope() || Previous->isOneOf(tok::semi, tok::kw_return) || (Previous->getPrecedence() == prec::Assignment && - Style.AlignOperands) || + Style.AlignOperands != FormatStyle::OAS_DontAlign) || Previous->is(TT_ObjCMethodExpr))); for (SmallVectorImpl<prec::Level>::const_reverse_iterator I = Current.FakeLParens.rbegin(), @@ -1243,6 +1324,9 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, NewParenState.Tok = nullptr; NewParenState.ContainsLineBreak = false; NewParenState.LastOperatorWrapped = true; + NewParenState.IsChainedConditional = false; + NewParenState.IsWrappedConditional = false; + NewParenState.UnindentOperator = false; NewParenState.NoLineBreak = NewParenState.NoLineBreak || State.Stack.back().NoLineBreakInOperand; @@ -1254,14 +1338,27 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, // a builder type call after 'return' or, if the alignment after opening // brackets is disabled. if (!Current.isTrailingComment() && - (Style.AlignOperands || *I < prec::Assignment) && + (Style.AlignOperands != FormatStyle::OAS_DontAlign || + *I < prec::Assignment) && (!Previous || Previous->isNot(tok::kw_return) || (Style.Language != FormatStyle::LK_Java && *I > 0)) && (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign || - *I != prec::Comma || Current.NestingLevel == 0)) + *I != prec::Comma || Current.NestingLevel == 0)) { NewParenState.Indent = std::max(std::max(State.Column, NewParenState.Indent), State.Stack.back().LastSpace); + } + + // If BreakBeforeBinaryOperators is set, un-indent a bit to account for + // the operator and keep the operands aligned + if (Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator && + Previous && + (Previous->getPrecedence() == prec::Assignment || + Previous->is(tok::kw_return) || + (*I == prec::Conditional && Previous->is(tok::question) && + Previous->is(TT_ConditionalExpr))) && + !Newline) + NewParenState.UnindentOperator = true; // Do not indent relative to the fake parentheses inserted for "." or "->". // This is a special case to make the following to statements consistent: @@ -1275,14 +1372,21 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) NewParenState.StartOfFunctionCall = State.Column; - // Always indent conditional expressions. Never indent expression where - // the 'operator' is ',', ';' or an assignment (i.e. *I <= - // prec::Assignment) as those have different indentation rules. Indent - // other expression, unless the indentation needs to be skipped. - if (*I == prec::Conditional || - (!SkipFirstExtraIndent && *I > prec::Assignment && - !Current.isTrailingComment())) + // Indent conditional expressions, unless they are chained "else-if" + // conditionals. Never indent expression where the 'operator' is ',', ';' or + // an assignment (i.e. *I <= prec::Assignment) as those have different + // indentation rules. Indent other expression, unless the indentation needs + // to be skipped. + if (*I == prec::Conditional && Previous && Previous->is(tok::colon) && + Previous->is(TT_ConditionalExpr) && I == Current.FakeLParens.rbegin() && + !State.Stack.back().IsWrappedConditional) { + NewParenState.IsChainedConditional = true; + NewParenState.UnindentOperator = State.Stack.back().UnindentOperator; + } else if (*I == prec::Conditional || + (!SkipFirstExtraIndent && *I > prec::Assignment && + !Current.isTrailingComment())) { NewParenState.Indent += Style.ContinuationIndentWidth; + } if ((Previous && !Previous->opensScope()) || *I != prec::Comma) NewParenState.BreakBeforeParameter = false; State.Stack.push_back(NewParenState); @@ -1308,6 +1412,11 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, if (!Current.opensScope()) return; + // Don't allow '<' or '(' in C# generic type constraints to start new scopes. + if (Current.isOneOf(tok::less, tok::l_paren) && + State.Stack.back().IsCSharpGenericTypeConstraint) + return; + if (Current.MatchingParen && Current.BlockKind == BK_Block) { moveStateToNewBlock(State); return; @@ -1372,6 +1481,7 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, (State.Line->Type == LT_ObjCDecl && ObjCBinPackProtocolList); AvoidBinPacking = + (State.Stack.back().IsCSharpGenericTypeConstraint) || (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) || (State.Line->MustBeDeclaration && !BinPackDeclaration) || (!State.Line->MustBeDeclaration && !Style.BinPackArguments) || @@ -1380,7 +1490,8 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, (!BinPackInconclusiveFunctions && Current.PackingKind == PPK_Inconclusive))); - if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) { + if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen && + Style.ObjCBreakBeforeNestedBlockParam) { if (Style.ColumnLimit) { // If this '[' opens an ObjC call, determine whether all parameters fit // into one line and put one per line if they don't. @@ -1418,7 +1529,22 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, ParenState(&Current, NewIndent, LastSpace, AvoidBinPacking, NoLineBreak)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; - State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1; + State.Stack.back().HasMultipleNestedBlocks = + (Current.BlockParameterCount > 1); + + if (Style.BraceWrapping.BeforeLambdaBody && Current.Next != nullptr && + Current.Tok.is(tok::l_paren)) { + // Search for any parameter that is a lambda + FormatToken const *next = Current.Next; + while (next != nullptr) { + if (next->is(TT_LambdaLSquare)) { + State.Stack.back().HasMultipleNestedBlocks = true; + break; + } + next = next->Next; + } + } + State.Stack.back().IsInsideObjCArrayLiteral = Current.is(TT_ArrayInitializerLSquare) && Current.Previous && Current.Previous->is(tok::at); @@ -1513,8 +1639,8 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( unsigned OldSuffixSize = 2 + OldDelimiter.size(); // We create a virtual text environment which expects a null-terminated // string, so we cannot use StringRef. - std::string RawText = - Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize); + std::string RawText = std::string( + Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize)); if (NewDelimiter != OldDelimiter) { // Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the // raw string. @@ -1760,7 +1886,7 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for C#,Java and + // FIXME: String literal breaking is currently disabled for C#, Java and // JavaScript, as it requires strings to be merged using "+" which we // don't support. if (Style.Language == FormatStyle::LK_Java || diff --git a/clang/lib/Format/ContinuationIndenter.h b/clang/lib/Format/ContinuationIndenter.h index 11df619e0f40..b1b2611263a9 100644 --- a/clang/lib/Format/ContinuationIndenter.h +++ b/clang/lib/Format/ContinuationIndenter.h @@ -202,13 +202,16 @@ struct ParenState { ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak) : Tok(Tok), Indent(Indent), LastSpace(LastSpace), - NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), - AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), - NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), - LastOperatorWrapped(true), ContainsLineBreak(false), - ContainsUnwrappedBuilder(false), AlignColons(true), - ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), - NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {} + NestedBlockIndent(Indent), IsAligned(false), + BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking), + BreakBeforeParameter(false), NoLineBreak(NoLineBreak), + NoLineBreakInOperand(false), LastOperatorWrapped(true), + ContainsLineBreak(false), ContainsUnwrappedBuilder(false), + AlignColons(true), ObjCSelectorNameFound(false), + HasMultipleNestedBlocks(false), NestedBlockInlined(false), + IsInsideObjCArrayLiteral(false), IsCSharpGenericTypeConstraint(false), + IsChainedConditional(false), IsWrappedConditional(false), + UnindentOperator(false) {} /// \brief The token opening this parenthesis level, or nullptr if this level /// is opened by fake parenthesis. @@ -264,6 +267,9 @@ struct ParenState { /// Used to align further variables if necessary. unsigned VariablePos = 0; + /// Whether this block's indentation is used for alignment. + bool IsAligned : 1; + /// Whether a newline needs to be inserted before the block's closing /// brace. /// @@ -329,6 +335,20 @@ struct ParenState { /// array literal. bool IsInsideObjCArrayLiteral : 1; + bool IsCSharpGenericTypeConstraint : 1; + + /// \brief true if the current \c ParenState represents the false branch of + /// a chained conditional expression (e.g. else-if) + bool IsChainedConditional : 1; + + /// \brief true if there conditionnal was wrapped on the first operator (the + /// question mark) + bool IsWrappedConditional : 1; + + /// \brief Indicates the indent should be reduced by the length of the + /// operator. + bool UnindentOperator : 1; + bool operator<(const ParenState &Other) const { if (Indent != Other.Indent) return Indent < Other.Indent; @@ -338,6 +358,8 @@ struct ParenState { return NestedBlockIndent < Other.NestedBlockIndent; if (FirstLessLess != Other.FirstLessLess) return FirstLessLess < Other.FirstLessLess; + if (IsAligned != Other.IsAligned) + return IsAligned; if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) return BreakBeforeClosingBrace; if (QuestionColumn != Other.QuestionColumn) @@ -366,6 +388,14 @@ struct ParenState { return ContainsUnwrappedBuilder; if (NestedBlockInlined != Other.NestedBlockInlined) return NestedBlockInlined; + if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint) + return IsCSharpGenericTypeConstraint; + if (IsChainedConditional != Other.IsChainedConditional) + return IsChainedConditional; + if (IsWrappedConditional != Other.IsWrappedConditional) + return IsWrappedConditional; + if (UnindentOperator != Other.UnindentOperator) + return UnindentOperator; return false; } }; diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index f12bca48c630..0d277a6464af 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -14,6 +14,7 @@ #include "clang/Format/Format.h" #include "AffectedRangeManager.h" +#include "BreakableToken.h" #include "ContinuationIndenter.h" #include "FormatInternal.h" #include "FormatTokenLexer.h" @@ -93,6 +94,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); IO.enumCase(Value, "ForContinuationAndIndentation", FormatStyle::UT_ForContinuationAndIndentation); + IO.enumCase(Value, "AlignWithSpaces", FormatStyle::UT_AlignWithSpaces); } }; @@ -157,6 +159,13 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BinPackStyle> { } }; +template <> struct ScalarEnumerationTraits<FormatStyle::TrailingCommaStyle> { + static void enumeration(IO &IO, FormatStyle::TrailingCommaStyle &Value) { + IO.enumCase(Value, "None", FormatStyle::TCS_None); + IO.enumCase(Value, "Wrapped", FormatStyle::TCS_Wrapped); + } +}; + template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { IO.enumCase(Value, "All", FormatStyle::BOS_All); @@ -187,11 +196,13 @@ struct ScalarEnumerationTraits< static void enumeration(IO &IO, FormatStyle::BraceWrappingAfterControlStatementStyle &Value) { - IO.enumCase(Value, "false", FormatStyle::BWACS_Never); - IO.enumCase(Value, "true", FormatStyle::BWACS_Always); IO.enumCase(Value, "Never", FormatStyle::BWACS_Never); IO.enumCase(Value, "MultiLine", FormatStyle::BWACS_MultiLine); IO.enumCase(Value, "Always", FormatStyle::BWACS_Always); + + // For backward compatibility. + IO.enumCase(Value, "false", FormatStyle::BWACS_Never); + IO.enumCase(Value, "true", FormatStyle::BWACS_Always); } }; @@ -225,6 +236,17 @@ struct ScalarEnumerationTraits<FormatStyle::PPDirectiveIndentStyle> { }; template <> +struct ScalarEnumerationTraits<FormatStyle::IndentExternBlockStyle> { + static void enumeration(IO &IO, FormatStyle::IndentExternBlockStyle &Value) { + IO.enumCase(Value, "AfterExternBlock", FormatStyle::IEBS_AfterExternBlock); + IO.enumCase(Value, "Indent", FormatStyle::IEBS_Indent); + IO.enumCase(Value, "NoIndent", FormatStyle::IEBS_NoIndent); + IO.enumCase(Value, "true", FormatStyle::IEBS_Indent); + IO.enumCase(Value, "false", FormatStyle::IEBS_NoIndent); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> { static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) { IO.enumCase(Value, "None", FormatStyle::RTBS_None); @@ -300,6 +322,19 @@ struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> { } }; +template <> struct ScalarEnumerationTraits<FormatStyle::OperandAlignmentStyle> { + static void enumeration(IO &IO, FormatStyle::OperandAlignmentStyle &Value) { + IO.enumCase(Value, "DontAlign", FormatStyle::OAS_DontAlign); + IO.enumCase(Value, "Align", FormatStyle::OAS_Align); + IO.enumCase(Value, "AlignAfterOperator", + FormatStyle::OAS_AlignAfterOperator); + + // For backward compatibility. + IO.enumCase(Value, "true", FormatStyle::OAS_Align); + IO.enumCase(Value, "false", FormatStyle::OAS_DontAlign); + } +}; + template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); @@ -319,6 +354,8 @@ struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); IO.enumCase(Value, "ControlStatements", FormatStyle::SBPO_ControlStatements); + IO.enumCase(Value, "ControlStatementsExceptForEachMacros", + FormatStyle::SBPO_ControlStatementsExceptForEachMacros); IO.enumCase(Value, "NonEmptyParentheses", FormatStyle::SBPO_NonEmptyParentheses); IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); @@ -378,6 +415,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("AlignConsecutiveMacros", Style.AlignConsecutiveMacros); IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments); + IO.mapOptional("AlignConsecutiveBitFields", + Style.AlignConsecutiveBitFields); IO.mapOptional("AlignConsecutiveDeclarations", Style.AlignConsecutiveDeclarations); IO.mapOptional("AlignEscapedNewlines", Style.AlignEscapedNewlines); @@ -389,6 +428,8 @@ template <> struct MappingTraits<FormatStyle> { Style.AllowAllConstructorInitializersOnNextLine); IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", Style.AllowAllParametersOfDeclarationOnNextLine); + IO.mapOptional("AllowShortEnumsOnASingleLine", + Style.AllowShortEnumsOnASingleLine); IO.mapOptional("AllowShortBlocksOnASingleLine", Style.AllowShortBlocksOnASingleLine); IO.mapOptional("AllowShortCaseLabelsOnASingleLine", @@ -480,11 +521,14 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("IncludeIsMainSourceRegex", Style.IncludeStyle.IncludeIsMainSourceRegex); IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); + IO.mapOptional("IndentCaseBlocks", Style.IndentCaseBlocks); IO.mapOptional("IndentGotoLabels", Style.IndentGotoLabels); IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives); + IO.mapOptional("IndentExternBlock", Style.IndentExternBlock); IO.mapOptional("IndentWidth", Style.IndentWidth); IO.mapOptional("IndentWrappedFunctionNames", Style.IndentWrappedFunctionNames); + IO.mapOptional("InsertTrailingCommas", Style.InsertTrailingCommas); IO.mapOptional("JavaImportGroups", Style.JavaImportGroups); IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes); IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports); @@ -497,6 +541,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("NamespaceMacros", Style.NamespaceMacros); IO.mapOptional("ObjCBinPackProtocolList", Style.ObjCBinPackProtocolList); IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); + IO.mapOptional("ObjCBreakBeforeNestedBlockParam", + Style.ObjCBreakBeforeNestedBlockParam); IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); IO.mapOptional("ObjCSpaceBeforeProtocolList", Style.ObjCSpaceBeforeProtocolList); @@ -553,6 +599,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("TypenameMacros", Style.TypenameMacros); IO.mapOptional("UseCRLF", Style.UseCRLF); IO.mapOptional("UseTab", Style.UseTab); + IO.mapOptional("WhitespaceSensitiveMacros", + Style.WhitespaceSensitiveMacros); } }; @@ -570,6 +618,8 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> { IO.mapOptional("AfterExternBlock", Wrapping.AfterExternBlock); IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch); IO.mapOptional("BeforeElse", Wrapping.BeforeElse); + IO.mapOptional("BeforeLambdaBody", Wrapping.BeforeLambdaBody); + IO.mapOptional("BeforeWhile", Wrapping.BeforeWhile); IO.mapOptional("IndentBraces", Wrapping.IndentBraces); IO.mapOptional("SplitEmptyFunction", Wrapping.SplitEmptyFunction); IO.mapOptional("SplitEmptyRecord", Wrapping.SplitEmptyRecord); @@ -643,6 +693,8 @@ std::string ParseErrorCategory::message(int EV) const { return "Invalid argument"; case ParseError::Unsuitable: return "Unsuitable"; + case ParseError::BinPackTrailingCommaConflict: + return "trailing comma insertion cannot be used with bin packing"; } llvm_unreachable("unexpected parse error"); } @@ -651,12 +703,24 @@ static FormatStyle expandPresets(const FormatStyle &Style) { if (Style.BreakBeforeBraces == FormatStyle::BS_Custom) return Style; FormatStyle Expanded = Style; - Expanded.BraceWrapping = {false, false, FormatStyle::BWACS_Never, - false, false, false, - false, false, false, - false, false, false, - false, true, true, - true}; + Expanded.BraceWrapping = {/*AfterCaseLabel=*/false, + /*AfterClass=*/false, + /*AfterControlStatement=*/FormatStyle::BWACS_Never, + /*AfterEnum=*/false, + /*AfterFunction=*/false, + /*AfterNamespace=*/false, + /*AfterObjCDeclaration=*/false, + /*AfterStruct=*/false, + /*AfterUnion=*/false, + /*AfterExternBlock=*/false, + /*BeforeCatch=*/false, + /*BeforeElse=*/false, + /*BeforeLambdaBody=*/false, + /*BeforeWhile=*/false, + /*IndentBraces=*/false, + /*SplitEmptyFunction=*/true, + /*SplitEmptyRecord=*/true, + /*SplitEmptyNamespace=*/true}; switch (Style.BreakBeforeBraces) { case FormatStyle::BS_Linux: Expanded.BraceWrapping.AfterClass = true; @@ -670,6 +734,7 @@ static FormatStyle expandPresets(const FormatStyle &Style) { Expanded.BraceWrapping.AfterStruct = true; Expanded.BraceWrapping.AfterUnion = true; Expanded.BraceWrapping.AfterExternBlock = true; + Expanded.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock; Expanded.BraceWrapping.SplitEmptyFunction = true; Expanded.BraceWrapping.SplitEmptyRecord = false; break; @@ -689,6 +754,7 @@ static FormatStyle expandPresets(const FormatStyle &Style) { Expanded.BraceWrapping.AfterStruct = true; Expanded.BraceWrapping.AfterUnion = true; Expanded.BraceWrapping.AfterExternBlock = true; + Expanded.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock; Expanded.BraceWrapping.BeforeCatch = true; Expanded.BraceWrapping.BeforeElse = true; break; @@ -702,16 +768,32 @@ static FormatStyle expandPresets(const FormatStyle &Style) { Expanded.BraceWrapping.AfterObjCDeclaration = true; Expanded.BraceWrapping.AfterStruct = true; Expanded.BraceWrapping.AfterExternBlock = true; + Expanded.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock; Expanded.BraceWrapping.BeforeCatch = true; Expanded.BraceWrapping.BeforeElse = true; + Expanded.BraceWrapping.BeforeLambdaBody = true; break; case FormatStyle::BS_GNU: - Expanded.BraceWrapping = {true, true, FormatStyle::BWACS_Always, - true, true, true, - true, true, true, - true, true, true, - true, true, true, - true}; + Expanded.BraceWrapping = { + /*AfterCaseLabel=*/true, + /*AfterClass=*/true, + /*AfterControlStatement=*/FormatStyle::BWACS_Always, + /*AfterEnum=*/true, + /*AfterFunction=*/true, + /*AfterNamespace=*/true, + /*AfterObjCDeclaration=*/true, + /*AfterStruct=*/true, + /*AfterUnion=*/true, + /*AfterExternBlock=*/true, + /*BeforeCatch=*/true, + /*BeforeElse=*/true, + /*BeforeLambdaBody=*/false, + /*BeforeWhile=*/true, + /*IndentBraces=*/true, + /*SplitEmptyFunction=*/true, + /*SplitEmptyRecord=*/true, + /*SplitEmptyNamespace=*/true}; + Expanded.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock; break; case FormatStyle::BS_WebKit: Expanded.BraceWrapping.AfterFunction = true; @@ -728,14 +810,16 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.AccessModifierOffset = -2; LLVMStyle.AlignEscapedNewlines = FormatStyle::ENAS_Right; LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; - LLVMStyle.AlignOperands = true; + LLVMStyle.AlignOperands = FormatStyle::OAS_Align; LLVMStyle.AlignTrailingComments = true; LLVMStyle.AlignConsecutiveAssignments = false; + LLVMStyle.AlignConsecutiveBitFields = false; LLVMStyle.AlignConsecutiveDeclarations = false; LLVMStyle.AlignConsecutiveMacros = false; LLVMStyle.AllowAllArgumentsOnNextLine = true; LLVMStyle.AllowAllConstructorInitializersOnNextLine = true; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; + LLVMStyle.AllowShortEnumsOnASingleLine = true; LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; LLVMStyle.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Never; LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; @@ -751,12 +835,25 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeTernaryOperators = true; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; - LLVMStyle.BraceWrapping = {false, false, FormatStyle::BWACS_Never, - false, false, false, - false, false, false, - false, false, false, - false, true, true, - true}; + LLVMStyle.BraceWrapping = {/*AfterCaseLabel=*/false, + /*AfterClass=*/false, + /*AfterControlStatement=*/FormatStyle::BWACS_Never, + /*AfterEnum=*/false, + /*AfterFunction=*/false, + /*AfterNamespace=*/false, + /*AfterObjCDeclaration=*/false, + /*AfterStruct=*/false, + /*AfterUnion=*/false, + /*AfterExternBlock=*/false, + /*BeforeCatch=*/false, + /*BeforeElse=*/false, + /*BeforeLambdaBody=*/false, + /*BeforeWhile=*/false, + /*IndentBraces=*/false, + /*SplitEmptyFunction=*/true, + /*SplitEmptyRecord=*/true, + /*SplitEmptyNamespace=*/true}; + LLVMStyle.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock; LLVMStyle.BreakAfterJavaFieldAnnotations = false; LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon; LLVMStyle.BreakInheritanceList = FormatStyle::BILS_BeforeColon; @@ -782,10 +879,12 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.IncludeStyle.IncludeIsMainRegex = "(Test)?$"; LLVMStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve; LLVMStyle.IndentCaseLabels = false; + LLVMStyle.IndentCaseBlocks = false; LLVMStyle.IndentGotoLabels = true; LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None; LLVMStyle.IndentWrappedFunctionNames = false; LLVMStyle.IndentWidth = 2; + LLVMStyle.InsertTrailingCommas = FormatStyle::TCS_None; LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave; LLVMStyle.JavaScriptWrapImports = true; LLVMStyle.TabWidth = 8; @@ -794,6 +893,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; LLVMStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Auto; LLVMStyle.ObjCBlockIndentWidth = 2; + LLVMStyle.ObjCBreakBeforeNestedBlockParam = true; LLVMStyle.ObjCSpaceAfterProperty = false; LLVMStyle.ObjCSpaceBeforeProtocolList = true; LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; @@ -835,6 +935,9 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.SortUsingDeclarations = true; LLVMStyle.StatementMacros.push_back("Q_UNUSED"); LLVMStyle.StatementMacros.push_back("QT_REQUIRE_VERSION"); + LLVMStyle.WhitespaceSensitiveMacros.push_back("STRINGIZE"); + LLVMStyle.WhitespaceSensitiveMacros.push_back("PP_STRINGIZE"); + LLVMStyle.WhitespaceSensitiveMacros.push_back("BOOST_PP_STRINGIZE"); // Defaults that differ when not C++. if (Language == FormatStyle::LK_TableGen) { @@ -911,6 +1014,8 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { "PARSE_TEXT_PROTO", "ParseTextOrDie", "ParseTextProtoOrDie", + "ParseTestProto", + "ParsePartialTestProto", }, /*CanonicalDelimiter=*/"", /*BasedOnStyle=*/"google", @@ -924,7 +1029,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { if (Language == FormatStyle::LK_Java) { GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; - GoogleStyle.AlignOperands = false; + GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign; GoogleStyle.AlignTrailingComments = false; GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; GoogleStyle.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_Never; @@ -935,13 +1040,18 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.SpacesBeforeTrailingComments = 1; } else if (Language == FormatStyle::LK_JavaScript) { GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; - GoogleStyle.AlignOperands = false; + GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign; GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; + // TODO: still under discussion whether to switch to SLS_All. + GoogleStyle.AllowShortLambdasOnASingleLine = FormatStyle::SLS_Empty; GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.BreakBeforeTernaryOperators = false; - // taze:, triple slash directives (`/// <...`), @see, which is commonly - // followed by overlong URLs. - GoogleStyle.CommentPragmas = "(taze:|^/[ \t]*<|@see)"; + // taze:, triple slash directives (`/// <...`), tslint:, and @see, which is + // commonly followed by overlong URLs. + GoogleStyle.CommentPragmas = "(taze:|^/[ \t]*<|tslint:|@see)"; + // TODO: enable once decided, in particular re disabling bin packing. + // https://google.github.io/styleguide/jsguide.html#features-arrays-trailing-comma + // GoogleStyle.InsertTrailingCommas = FormatStyle::TCS_Wrapped; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; GoogleStyle.SpacesInContainerLiterals = false; @@ -966,6 +1076,12 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { // #imports, etc.) GoogleStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve; + } else if (Language == FormatStyle::LK_CSharp) { + GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; + GoogleStyle.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_Never; + GoogleStyle.BreakStringLiterals = false; + GoogleStyle.ColumnLimit = 100; + GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; } return GoogleStyle; @@ -1061,7 +1177,7 @@ FormatStyle getWebKitStyle() { FormatStyle Style = getLLVMStyle(); Style.AccessModifierOffset = -4; Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; - Style.AlignOperands = false; + Style.AlignOperands = FormatStyle::OAS_DontAlign; Style.AlignTrailingComments = false; Style.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Empty; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; @@ -1110,9 +1226,12 @@ FormatStyle getMicrosoftStyle(FormatStyle::LanguageKind Language) { Style.BraceWrapping.AfterObjCDeclaration = true; Style.BraceWrapping.AfterStruct = true; Style.BraceWrapping.AfterExternBlock = true; + Style.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock; Style.BraceWrapping.BeforeCatch = true; Style.BraceWrapping.BeforeElse = true; + Style.BraceWrapping.BeforeWhile = false; Style.PenaltyReturnTypeOnItsOwnLine = 1000; + Style.AllowShortEnumsOnASingleLine = false; Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; Style.AllowShortCaseLabelsOnASingleLine = false; Style.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_Never; @@ -1207,6 +1326,11 @@ std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { StyleSet.Add(std::move(DefaultStyle)); } *Style = *StyleSet.Get(Language); + if (Style->InsertTrailingCommas != FormatStyle::TCS_None && + Style->BinPackArguments) { + // See comment on FormatStyle::TSC_Wrapped. + return make_error_code(ParseError::BinPackTrailingCommaConflict); + } return make_error_code(ParseError::Success); } @@ -1462,6 +1586,75 @@ private: FormattingAttemptStatus *Status; }; +/// TrailingCommaInserter inserts trailing commas into container literals. +/// E.g.: +/// const x = [ +/// 1, +/// ]; +/// TrailingCommaInserter runs after formatting. To avoid causing a required +/// reformatting (and thus reflow), it never inserts a comma that'd exceed the +/// ColumnLimit. +/// +/// Because trailing commas disable binpacking of arrays, TrailingCommaInserter +/// is conceptually incompatible with bin packing. +class TrailingCommaInserter : public TokenAnalyzer { +public: + TrailingCommaInserter(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} + + std::pair<tooling::Replacements, unsigned> + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override { + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); + tooling::Replacements Result; + insertTrailingCommas(AnnotatedLines, Result); + return {Result, 0}; + } + +private: + /// Inserts trailing commas in [] and {} initializers if they wrap over + /// multiple lines. + void insertTrailingCommas(SmallVectorImpl<AnnotatedLine *> &Lines, + tooling::Replacements &Result) { + for (AnnotatedLine *Line : Lines) { + insertTrailingCommas(Line->Children, Result); + if (!Line->Affected) + continue; + for (FormatToken *FormatTok = Line->First; FormatTok; + FormatTok = FormatTok->Next) { + if (FormatTok->NewlinesBefore == 0) + continue; + FormatToken *Matching = FormatTok->MatchingParen; + if (!Matching || !FormatTok->getPreviousNonComment()) + continue; + if (!(FormatTok->is(tok::r_square) && + Matching->is(TT_ArrayInitializerLSquare)) && + !(FormatTok->is(tok::r_brace) && Matching->is(TT_DictLiteral))) + continue; + FormatToken *Prev = FormatTok->getPreviousNonComment(); + if (Prev->is(tok::comma) || Prev->is(tok::semi)) + continue; + // getEndLoc is not reliably set during re-lexing, use text length + // instead. + SourceLocation Start = + Prev->Tok.getLocation().getLocWithOffset(Prev->TokenText.size()); + // If inserting a comma would push the code over the column limit, skip + // this location - it'd introduce an unstable formatting due to the + // required reflow. + unsigned ColumnNumber = + Env.getSourceManager().getSpellingColumnNumber(Start); + if (ColumnNumber > Style.ColumnLimit) + continue; + // Comma insertions cannot conflict with each other, and this pass has a + // clean set of Replacements, so the operation below cannot fail. + cantFail(Result.add( + tooling::Replacement(Env.getSourceManager(), Start, 0, ","))); + } + } + } +}; + // This class clean up the erroneous/redundant code around the given ranges in // file. class Cleaner : public TokenAnalyzer { @@ -1808,7 +2001,7 @@ private: << FormatTok->Tok.getLocation().printToString( SourceManager) << " token: " << FormatTok->TokenText << " token type: " - << getTokenTypeName(FormatTok->Type) << "\n"); + << getTokenTypeName(FormatTok->getType()) << "\n"); return true; } if (guessIsObjC(SourceManager, Line->Children, Keywords)) @@ -1951,8 +2144,7 @@ static void sortCppIncludes(const FormatStyle &Style, // enough as additional newlines might be added or removed across #include // blocks. This we handle below by generating the updated #imclude blocks and // comparing it to the original. - if (Indices.size() == Includes.size() && - std::is_sorted(Indices.begin(), Indices.end()) && + if (Indices.size() == Includes.size() && llvm::is_sorted(Indices) && Style.IncludeStyle.IncludeBlocks == tooling::IncludeStyle::IBS_Preserve) return; @@ -1973,8 +2165,8 @@ static void sortCppIncludes(const FormatStyle &Style, // If the #includes are out of order, we generate a single replacement fixing // the entire range of blocks. Otherwise, no replacement is generated. - if (replaceCRLF(result) == - replaceCRLF(Code.substr(IncludesBeginOffset, IncludesBlockSize))) + if (replaceCRLF(result) == replaceCRLF(std::string(Code.substr( + IncludesBeginOffset, IncludesBlockSize)))) return; auto Err = Replaces.add(tooling::Replacement( @@ -2142,8 +2334,8 @@ static void sortJavaImports(const FormatStyle &Style, // If the imports are out of order, we generate a single replacement fixing // the entire block. Otherwise, no replacement is generated. - if (replaceCRLF(result) == - replaceCRLF(Code.substr(Imports.front().Offset, ImportsBlockSize))) + if (replaceCRLF(result) == replaceCRLF(std::string(Code.substr( + Imports.front().Offset, ImportsBlockSize)))) return; auto Err = Replaces.add(tooling::Replacement(FileName, Imports.front().Offset, @@ -2431,6 +2623,12 @@ reformat(const FormatStyle &Style, StringRef Code, return Formatter(Env, Expanded, Status).process(); }); + if (Style.Language == FormatStyle::LK_JavaScript && + Style.InsertTrailingCommas == FormatStyle::TCS_Wrapped) + Passes.emplace_back([&](const Environment &Env) { + return TrailingCommaInserter(Env, Expanded).process(); + }); + auto Env = std::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn, NextStartColumn, LastStartColumn); @@ -2518,7 +2716,8 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOpts.CPlusPlus11 = LexingStd >= FormatStyle::LS_Cpp11; LangOpts.CPlusPlus14 = LexingStd >= FormatStyle::LS_Cpp14; LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; - LangOpts.CPlusPlus2a = LexingStd >= FormatStyle::LS_Cpp20; + LangOpts.CPlusPlus20 = LexingStd >= FormatStyle::LS_Cpp20; + LangOpts.Char8 = LexingStd >= FormatStyle::LS_Cpp20; LangOpts.LineComment = 1; bool AlternativeOperators = Style.isCpp(); @@ -2532,7 +2731,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { const char *StyleOptionHelpDescription = "Coding style, currently supports:\n" - " LLVM, Google, Chromium, Mozilla, WebKit.\n" + " LLVM, GNU, Google, Chromium, Microsoft, Mozilla, WebKit.\n" "Use -style=file to load style configuration from\n" ".clang-format file located in one of the parent\n" "directories of the source file (or current\n" diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 90d09064bb43..7d792974cd57 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -50,6 +50,7 @@ bool FormatToken::isSimpleTypeSpecifier() const { case tok::kw_half: case tok::kw_float: case tok::kw_double: + case tok::kw___bf16: case tok::kw__Float16: case tok::kw___float128: case tok::kw_wchar_t: @@ -84,8 +85,8 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, const FormatToken *LBrace = State.NextToken->Previous->getPreviousNonComment(); if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral || - LBrace->Next->Type == TT_DesignatedInitializerPeriod) + LBrace->BlockKind == BK_Block || LBrace->getType() == TT_DictLiteral || + LBrace->Next->getType() == TT_DesignatedInitializerPeriod) return 0; // Calculate the number of code points we have to format this list. As the diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index e9cd327754ef..d4287f53fde3 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -54,6 +54,7 @@ namespace format { TYPE(InheritanceComma) \ TYPE(InlineASMBrace) \ TYPE(InlineASMColon) \ + TYPE(InlineASMSymbolicNameLSquare) \ TYPE(JavaAnnotation) \ TYPE(JsComputedPropertyName) \ TYPE(JsExponentiation) \ @@ -101,10 +102,20 @@ namespace format { TYPE(TrailingUnaryOperator) \ TYPE(TypenameMacro) \ TYPE(UnaryOperator) \ + TYPE(UntouchableMacroFunc) \ TYPE(CSharpStringLiteral) \ + TYPE(CSharpNamedArgumentColon) \ + TYPE(CSharpNullable) \ TYPE(CSharpNullCoalescing) \ + TYPE(CSharpNullConditional) \ + TYPE(CSharpNullConditionalLSquare) \ + TYPE(CSharpGenericTypeConstraint) \ + TYPE(CSharpGenericTypeConstraintColon) \ + TYPE(CSharpGenericTypeConstraintComma) \ TYPE(Unknown) +/// Determines the semantic type of a syntactic token, e.g. whether "<" is a +/// template opener or binary operator. enum TokenType { #define TYPE(X) TT_##X, LIST_TOKEN_TYPES @@ -172,6 +183,12 @@ struct FormatToken { /// before the token. bool MustBreakBefore = false; + /// Whether to not align across this token + /// + /// This happens for example when a preprocessor directive ended directly + /// before the token, but very rarely otherwise. + bool MustBreakAlignBefore = false; + /// The raw text of the token. /// /// Contains the raw token text without leading whitespace and without leading @@ -184,7 +201,10 @@ struct FormatToken { /// Contains the kind of block if this token is a brace. BraceBlockKind BlockKind = BK_Unknown; - TokenType Type = TT_Unknown; + /// Returns the token's type, e.g. whether "<" is a template opener or + /// binary operator. + TokenType getType() const { return Type; } + void setType(TokenType T) { Type = T; } /// The number of spaces that should be inserted before this token. unsigned SpacesRequiredBefore = 0; @@ -504,6 +524,9 @@ struct FormatToken { /// Returns \c true if this tokens starts a block-type list, i.e. a /// list that should be indented with a block indent. bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { + // C# Does not indent object initialisers as continuations. + if (is(tok::l_brace) && BlockKind == BK_BracedInit && Style.isCSharp()) + return true; if (is(TT_TemplateString) && opensScope()) return true; return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || @@ -579,6 +602,8 @@ private: return Previous->endsSequenceInternal(K1, Tokens...); return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); } + + TokenType Type = TT_Unknown; }; class ContinuationIndenter; @@ -770,6 +795,8 @@ struct AdditionalKeywords { kw_unchecked = &IdentTable.get("unchecked"); kw_unsafe = &IdentTable.get("unsafe"); kw_ushort = &IdentTable.get("ushort"); + kw_when = &IdentTable.get("when"); + kw_where = &IdentTable.get("where"); // Keep this at the end of the constructor to make sure everything here // is @@ -786,7 +813,8 @@ struct AdditionalKeywords { kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal, kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params, kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed, - kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, + kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, kw_when, + kw_where, // Keywords from the JavaScript section. kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, @@ -890,13 +918,77 @@ struct AdditionalKeywords { IdentifierInfo *kw_unchecked; IdentifierInfo *kw_unsafe; IdentifierInfo *kw_ushort; + IdentifierInfo *kw_when; + IdentifierInfo *kw_where; /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. - bool IsJavaScriptIdentifier(const FormatToken &Tok) const { - return Tok.is(tok::identifier) && - JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == - JsExtraKeywords.end(); + /// If \c AcceptIdentifierName is true, returns true not only for keywords, + // but also for IdentifierName tokens (aka pseudo-keywords), such as + // ``yield``. + bool IsJavaScriptIdentifier(const FormatToken &Tok, + bool AcceptIdentifierName = true) const { + // Based on the list of JavaScript & TypeScript keywords here: + // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74 + switch (Tok.Tok.getKind()) { + case tok::kw_break: + case tok::kw_case: + case tok::kw_catch: + case tok::kw_class: + case tok::kw_continue: + case tok::kw_const: + case tok::kw_default: + case tok::kw_delete: + case tok::kw_do: + case tok::kw_else: + case tok::kw_enum: + case tok::kw_export: + case tok::kw_false: + case tok::kw_for: + case tok::kw_if: + case tok::kw_import: + case tok::kw_module: + case tok::kw_new: + case tok::kw_private: + case tok::kw_protected: + case tok::kw_public: + case tok::kw_return: + case tok::kw_static: + case tok::kw_switch: + case tok::kw_this: + case tok::kw_throw: + case tok::kw_true: + case tok::kw_try: + case tok::kw_typeof: + case tok::kw_void: + case tok::kw_while: + // These are JS keywords that are lexed by LLVM/clang as keywords. + return false; + case tok::identifier: { + // For identifiers, make sure they are true identifiers, excluding the + // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords). + bool IsPseudoKeyword = + JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) != + JsExtraKeywords.end(); + return AcceptIdentifierName || !IsPseudoKeyword; + } + default: + // Other keywords are handled in the switch below, to avoid problems due + // to duplicate case labels when using the #include trick. + break; + } + + switch (Tok.Tok.getKind()) { + // Handle C++ keywords not included above: these are all JS identifiers. +#define KEYWORD(X, Y) case tok::kw_##X: +#include "clang/Basic/TokenKinds.def" + // #undef KEYWORD is not needed -- it's #undef-ed at the end of + // TokenKinds.def + return true; + default: + // All other tokens (punctuation etc) are not JS identifiers. + return false; + } } /// Returns \c true if \p Tok is a C# keyword, returns diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index ef20ba884fb3..1fd153d1112e 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -22,13 +22,15 @@ namespace clang { namespace format { -FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, - unsigned Column, const FormatStyle &Style, - encoding::Encoding Encoding) +FormatTokenLexer::FormatTokenLexer( + const SourceManager &SourceMgr, FileID ID, unsigned Column, + const FormatStyle &Style, encoding::Encoding Encoding, + llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, + IdentifierTable &IdentTable) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), - Style(Style), IdentTable(getFormattingLangOpts(Style)), - Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), + Style(Style), IdentTable(IdentTable), Keywords(IdentTable), + Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0), FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), MacroBlockEndRegex(Style.MacroBlockEnd) { Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, @@ -43,6 +45,11 @@ FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, Macros.insert({&IdentTable.get(TypenameMacro), TT_TypenameMacro}); for (const std::string &NamespaceMacro : Style.NamespaceMacros) Macros.insert({&IdentTable.get(NamespaceMacro), TT_NamespaceMacro}); + for (const std::string &WhitespaceSensitiveMacro : + Style.WhitespaceSensitiveMacros) { + Macros.insert( + {&IdentTable.get(WhitespaceSensitiveMacro), TT_UntouchableMacroFunc}); + } } ArrayRef<FormatToken *> FormatTokenLexer::lex() { @@ -57,6 +64,10 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { if (Style.Language == FormatStyle::LK_TextProto) tryParsePythonComment(); tryMergePreviousTokens(); + if (Style.isCSharp()) + // This needs to come after tokens have been merged so that C# + // string literals are correctly identified. + handleCSharpVerbatimAndInterpolatedStrings(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); @@ -70,15 +81,19 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; if (tryMergeLessLess()) return; + if (tryMergeForEach()) + return; + if (Style.isCpp() && tryTransformTryUsageForC()) + return; if (Style.isCSharp()) { if (tryMergeCSharpKeywordVariables()) return; - if (tryMergeCSharpVerbatimStringLiteral()) + if (tryMergeCSharpStringLiteral()) return; if (tryMergeCSharpDoubleQuestion()) return; - if (tryMergeCSharpNullConditionals()) + if (tryMergeCSharpNullConditional()) return; if (tryTransformCSharpForEach()) return; @@ -120,8 +135,11 @@ void FormatTokenLexer::tryMergePreviousTokens() { Tokens.back()->Tok.setKind(tok::starequal); return; } - if (tryMergeTokens(JSNullishOperator, TT_JsNullishCoalescingOperator)) + if (tryMergeTokens(JSNullishOperator, TT_JsNullishCoalescingOperator)) { + // Treat like the "||" operator (as opposed to the ternary ?). + Tokens.back()->Tok.setKind(tok::pipepipe); return; + } if (tryMergeTokens(JSNullPropagatingOperator, TT_JsNullPropagatingOperator)) { // Treat like a regular "." access. @@ -151,7 +169,7 @@ bool FormatTokenLexer::tryMergeNSStringLiteral() { At->TokenText = StringRef(At->TokenText.begin(), String->TokenText.end() - At->TokenText.begin()); At->ColumnWidth += String->ColumnWidth; - At->Type = TT_ObjCStringLiteral; + At->setType(TT_ObjCStringLiteral); Tokens.erase(Tokens.end() - 1); return true; } @@ -170,7 +188,7 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() { StringRef(Hash->TokenText.begin(), Identifier->TokenText.end() - Hash->TokenText.begin()); Hash->ColumnWidth += Identifier->ColumnWidth; - Hash->Type = TT_JsPrivateIdentifier; + Hash->setType(TT_JsPrivateIdentifier); Tokens.erase(Tokens.end() - 1); return true; } @@ -178,18 +196,71 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() { // Search for verbatim or interpolated string literals @"ABC" or // $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to // prevent splitting of @, $ and ". -bool FormatTokenLexer::tryMergeCSharpVerbatimStringLiteral() { +// Merging of multiline verbatim strings with embedded '"' is handled in +// handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing. +bool FormatTokenLexer::tryMergeCSharpStringLiteral() { if (Tokens.size() < 2) return false; - auto &At = *(Tokens.end() - 2); + + // Interpolated strings could contain { } with " characters inside. + // $"{x ?? "null"}" + // should not be split into $"{x ?? ", null, "}" but should treated as a + // single string-literal. + // + // We opt not to try and format expressions inside {} within a C# + // interpolated string. Formatting expressions within an interpolated string + // would require similar work as that done for JavaScript template strings + // in `handleTemplateStrings()`. + auto &CSharpInterpolatedString = *(Tokens.end() - 2); + if (CSharpInterpolatedString->getType() == TT_CSharpStringLiteral && + (CSharpInterpolatedString->TokenText.startswith(R"($")") || + CSharpInterpolatedString->TokenText.startswith(R"($@")"))) { + int UnmatchedOpeningBraceCount = 0; + + auto TokenTextSize = CSharpInterpolatedString->TokenText.size(); + for (size_t Index = 0; Index < TokenTextSize; ++Index) { + char C = CSharpInterpolatedString->TokenText[Index]; + if (C == '{') { + // "{{" inside an interpolated string is an escaped '{' so skip it. + if (Index + 1 < TokenTextSize && + CSharpInterpolatedString->TokenText[Index + 1] == '{') { + ++Index; + continue; + } + ++UnmatchedOpeningBraceCount; + } else if (C == '}') { + // "}}" inside an interpolated string is an escaped '}' so skip it. + if (Index + 1 < TokenTextSize && + CSharpInterpolatedString->TokenText[Index + 1] == '}') { + ++Index; + continue; + } + --UnmatchedOpeningBraceCount; + } + } + + if (UnmatchedOpeningBraceCount > 0) { + auto &NextToken = *(Tokens.end() - 1); + CSharpInterpolatedString->TokenText = + StringRef(CSharpInterpolatedString->TokenText.begin(), + NextToken->TokenText.end() - + CSharpInterpolatedString->TokenText.begin()); + CSharpInterpolatedString->ColumnWidth += NextToken->ColumnWidth; + Tokens.erase(Tokens.end() - 1); + return true; + } + } + + // Look for @"aaaaaa" or $"aaaaaa". auto &String = *(Tokens.end() - 1); + if (!String->is(tok::string_literal)) + return false; - // Look for $"aaaaaa" @"aaaaaa". - if (!(At->is(tok::at) || At->TokenText == "$") || - !String->is(tok::string_literal)) + auto &At = *(Tokens.end() - 2); + if (!(At->is(tok::at) || At->TokenText == "$")) return false; - if (Tokens.size() >= 2 && At->is(tok::at)) { + if (Tokens.size() > 2 && At->is(tok::at)) { auto &Dollar = *(Tokens.end() - 3); if (Dollar->TokenText == "$") { // This looks like $@"aaaaa" so we need to combine all 3 tokens. @@ -198,7 +269,7 @@ bool FormatTokenLexer::tryMergeCSharpVerbatimStringLiteral() { StringRef(Dollar->TokenText.begin(), String->TokenText.end() - Dollar->TokenText.begin()); Dollar->ColumnWidth += (At->ColumnWidth + String->ColumnWidth); - Dollar->Type = TT_CSharpStringLiteral; + Dollar->setType(TT_CSharpStringLiteral); Tokens.erase(Tokens.end() - 2); Tokens.erase(Tokens.end() - 1); return true; @@ -210,11 +281,18 @@ bool FormatTokenLexer::tryMergeCSharpVerbatimStringLiteral() { At->TokenText = StringRef(At->TokenText.begin(), String->TokenText.end() - At->TokenText.begin()); At->ColumnWidth += String->ColumnWidth; - At->Type = TT_CSharpStringLiteral; + At->setType(TT_CSharpStringLiteral); Tokens.erase(Tokens.end() - 1); return true; } +// Valid C# attribute targets: +// https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/concepts/attributes/#attribute-targets +const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = { + "assembly", "module", "field", "event", "method", + "param", "property", "return", "type", +}; + bool FormatTokenLexer::tryMergeCSharpDoubleQuestion() { if (Tokens.size() < 2) return false; @@ -222,12 +300,38 @@ bool FormatTokenLexer::tryMergeCSharpDoubleQuestion() { auto &SecondQuestion = *(Tokens.end() - 1); if (!FirstQuestion->is(tok::question) || !SecondQuestion->is(tok::question)) return false; - FirstQuestion->Tok.setKind(tok::question); + FirstQuestion->Tok.setKind(tok::question); // no '??' in clang tokens. FirstQuestion->TokenText = StringRef(FirstQuestion->TokenText.begin(), SecondQuestion->TokenText.end() - FirstQuestion->TokenText.begin()); FirstQuestion->ColumnWidth += SecondQuestion->ColumnWidth; - FirstQuestion->Type = TT_CSharpNullCoalescing; + FirstQuestion->setType(TT_CSharpNullCoalescing); + Tokens.erase(Tokens.end() - 1); + return true; +} + +// Merge '?[' and '?.' pairs into single tokens. +bool FormatTokenLexer::tryMergeCSharpNullConditional() { + if (Tokens.size() < 2) + return false; + auto &Question = *(Tokens.end() - 2); + auto &PeriodOrLSquare = *(Tokens.end() - 1); + if (!Question->is(tok::question) || + !PeriodOrLSquare->isOneOf(tok::l_square, tok::period)) + return false; + Question->TokenText = + StringRef(Question->TokenText.begin(), + PeriodOrLSquare->TokenText.end() - Question->TokenText.begin()); + Question->ColumnWidth += PeriodOrLSquare->ColumnWidth; + + if (PeriodOrLSquare->is(tok::l_square)) { + Question->Tok.setKind(tok::question); // no '?[' in clang tokens. + Question->setType(TT_CSharpNullConditionalLSquare); + } else { + Question->Tok.setKind(tok::question); // no '?.' in clang tokens. + Question->setType(TT_CSharpNullConditional); + } + Tokens.erase(Tokens.end() - 1); return true; } @@ -246,24 +350,7 @@ bool FormatTokenLexer::tryMergeCSharpKeywordVariables() { At->TokenText = StringRef(At->TokenText.begin(), Keyword->TokenText.end() - At->TokenText.begin()); At->ColumnWidth += Keyword->ColumnWidth; - At->Type = Keyword->Type; - Tokens.erase(Tokens.end() - 1); - return true; -} - -// In C# merge the Identifier and the ? together e.g. arg?. -bool FormatTokenLexer::tryMergeCSharpNullConditionals() { - if (Tokens.size() < 2) - return false; - auto &Identifier = *(Tokens.end() - 2); - auto &Question = *(Tokens.end() - 1); - if (!Identifier->isOneOf(tok::r_square, tok::identifier) || - !Question->is(tok::question)) - return false; - Identifier->TokenText = - StringRef(Identifier->TokenText.begin(), - Question->TokenText.end() - Identifier->TokenText.begin()); - Identifier->ColumnWidth += Question->ColumnWidth; + At->setType(Keyword->getType()); Tokens.erase(Tokens.end() - 1); return true; } @@ -278,11 +365,53 @@ bool FormatTokenLexer::tryTransformCSharpForEach() { if (Identifier->TokenText != "foreach") return false; - Identifier->Type = TT_ForEachMacro; + Identifier->setType(TT_ForEachMacro); Identifier->Tok.setKind(tok::kw_for); return true; } +bool FormatTokenLexer::tryMergeForEach() { + if (Tokens.size() < 2) + return false; + auto &For = *(Tokens.end() - 2); + auto &Each = *(Tokens.end() - 1); + if (!For->is(tok::kw_for)) + return false; + if (!Each->is(tok::identifier)) + return false; + if (Each->TokenText != "each") + return false; + + For->setType(TT_ForEachMacro); + For->Tok.setKind(tok::kw_for); + + For->TokenText = StringRef(For->TokenText.begin(), + Each->TokenText.end() - For->TokenText.begin()); + For->ColumnWidth += Each->ColumnWidth; + Tokens.erase(Tokens.end() - 1); + return true; +} + +bool FormatTokenLexer::tryTransformTryUsageForC() { + if (Tokens.size() < 2) + return false; + auto &Try = *(Tokens.end() - 2); + if (!Try->is(tok::kw_try)) + return false; + auto &Next = *(Tokens.end() - 1); + if (Next->isOneOf(tok::l_brace, tok::colon)) + return false; + + if (Tokens.size() > 2) { + auto &At = *(Tokens.end() - 3); + if (At->is(tok::at)) + return false; + } + + Try->Tok.setKind(tok::identifier); + return true; +} + bool FormatTokenLexer::tryMergeLessLess() { // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. if (Tokens.size() < 3) @@ -329,7 +458,7 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, First[0]->TokenText = StringRef(First[0]->TokenText.data(), First[0]->TokenText.size() + AddLength); First[0]->ColumnWidth += AddLength; - First[0]->Type = NewType; + First[0]->setType(NewType); return true; } @@ -418,7 +547,7 @@ void FormatTokenLexer::tryParseJSRegexLiteral() { } } - RegexToken->Type = TT_RegexLiteral; + RegexToken->setType(TT_RegexLiteral); // Treat regex literals like other string_literals. RegexToken->Tok.setKind(tok::string_literal); RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin); @@ -427,6 +556,68 @@ void FormatTokenLexer::tryParseJSRegexLiteral() { resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); } +void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { + FormatToken *CSharpStringLiteral = Tokens.back(); + + if (CSharpStringLiteral->getType() != TT_CSharpStringLiteral) + return; + + // Deal with multiline strings. + if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") || + CSharpStringLiteral->TokenText.startswith(R"($@")"))) + return; + + const char *StrBegin = + Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size(); + const char *Offset = StrBegin; + if (CSharpStringLiteral->TokenText.startswith(R"(@")")) + Offset += 2; + else // CSharpStringLiteral->TokenText.startswith(R"($@")") + Offset += 3; + + // Look for a terminating '"' in the current file buffer. + // Make no effort to format code within an interpolated or verbatim string. + for (; Offset != Lex->getBuffer().end(); ++Offset) { + if (Offset[0] == '"') { + // "" within a verbatim string is an escaped double quote: skip it. + if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"') + ++Offset; + else + break; + } + } + + // Make no attempt to format code properly if a verbatim string is + // unterminated. + if (Offset == Lex->getBuffer().end()) + return; + + StringRef LiteralText(StrBegin, Offset - StrBegin + 1); + CSharpStringLiteral->TokenText = LiteralText; + + // Adjust width for potentially multiline string literals. + size_t FirstBreak = LiteralText.find('\n'); + StringRef FirstLineText = FirstBreak == StringRef::npos + ? LiteralText + : LiteralText.substr(0, FirstBreak); + CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs( + FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth, + Encoding); + size_t LastBreak = LiteralText.rfind('\n'); + if (LastBreak != StringRef::npos) { + CSharpStringLiteral->IsMultiline = true; + unsigned StartColumn = 0; + CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs( + LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn, + Style.TabWidth, Encoding); + } + + SourceLocation loc = Offset < Lex->getBuffer().end() + ? Lex->getSourceLocation(Offset + 1) + : SourceMgr.getLocForEndOfFile(ID); + resetLexer(SourceMgr.getFileOffset(loc)); +} + void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); @@ -468,7 +659,7 @@ void FormatTokenLexer::handleTemplateStrings() { } StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1); - BacktickToken->Type = TT_TemplateString; + BacktickToken->setType(TT_TemplateString); BacktickToken->Tok.setKind(tok::string_literal); BacktickToken->TokenText = LiteralText; @@ -506,7 +697,7 @@ void FormatTokenLexer::tryParsePythonComment() { if (To == StringRef::npos) To = Lex->getBuffer().size(); size_t Len = To - From; - HashToken->Type = TT_LineComment; + HashToken->setType(TT_LineComment); HashToken->Tok.setKind(tok::comment); HashToken->TokenText = Lex->getBuffer().substr(From, Len); SourceLocation Loc = To < Lex->getBuffer().size() @@ -604,7 +795,7 @@ bool FormatTokenLexer::tryMergeConflictMarkers() { // We do not need to build a complete token here, as we will skip it // during parsing anyway (as we must not touch whitespace around conflict // markers). - Tokens.back()->Type = Type; + Tokens.back()->setType(Type); Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); Tokens.push_back(Next); @@ -691,13 +882,13 @@ FormatToken *FormatTokenLexer::getNextToken() { break; case '\\': if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) - FormatTok->Type = TT_ImplicitStringLiteral; + FormatTok->setType(TT_ImplicitStringLiteral); break; default: - FormatTok->Type = TT_ImplicitStringLiteral; + FormatTok->setType(TT_ImplicitStringLiteral); break; } - if (FormatTok->Type == TT_ImplicitStringLiteral) + if (FormatTok->getType() == TT_ImplicitStringLiteral) break; } @@ -825,12 +1016,12 @@ FormatToken *FormatTokenLexer::getNextToken() { Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_define) && it != Macros.end()) { - FormatTok->Type = it->second; + FormatTok->setType(it->second); } else if (FormatTok->is(tok::identifier)) { if (MacroBlockBeginRegex.match(Text)) { - FormatTok->Type = TT_MacroBlockBegin; + FormatTok->setType(TT_MacroBlockBegin); } else if (MacroBlockEndRegex.match(Text)) { - FormatTok->Type = TT_MacroBlockEnd; + FormatTok->setType(TT_MacroBlockEnd); } } } diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 611211be055a..6b08677e3369 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -21,6 +21,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/Regex.h" #include <stack> @@ -37,7 +38,9 @@ enum LexerState { class FormatTokenLexer { public: FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, - const FormatStyle &Style, encoding::Encoding Encoding); + const FormatStyle &Style, encoding::Encoding Encoding, + llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, + IdentifierTable &IdentTable); ArrayRef<FormatToken *> lex(); @@ -49,11 +52,13 @@ private: bool tryMergeLessLess(); bool tryMergeNSStringLiteral(); bool tryMergeJSPrivateIdentifier(); - bool tryMergeCSharpVerbatimStringLiteral(); + bool tryMergeCSharpStringLiteral(); bool tryMergeCSharpKeywordVariables(); - bool tryMergeCSharpNullConditionals(); bool tryMergeCSharpDoubleQuestion(); + bool tryMergeCSharpNullConditional(); bool tryTransformCSharpForEach(); + bool tryMergeForEach(); + bool tryTransformTryUsageForC(); bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); @@ -79,6 +84,8 @@ private: // nested template parts by balancing curly braces. void handleTemplateStrings(); + void handleCSharpVerbatimAndInterpolatedStrings(); + void tryParsePythonComment(); bool tryMerge_TMacro(); @@ -98,10 +105,10 @@ private: const SourceManager &SourceMgr; FileID ID; const FormatStyle &Style; - IdentifierTable IdentTable; + IdentifierTable &IdentTable; AdditionalKeywords Keywords; encoding::Encoding Encoding; - llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; + llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; // Index (in 'Tokens') of the last token that starts a new line. unsigned FirstInLineIndex; SmallVector<FormatToken *, 16> Tokens; @@ -113,6 +120,9 @@ private: llvm::Regex MacroBlockBeginRegex; llvm::Regex MacroBlockEndRegex; + // Targets that may appear inside a C# attribute. + static const llvm::StringSet<> CSharpAttributeTargets; + void readRawToken(FormatToken &Tok); void resetLexer(unsigned Offset); diff --git a/clang/lib/Format/NamespaceEndCommentsFixer.cpp b/clang/lib/Format/NamespaceEndCommentsFixer.cpp index 20b424f86077..97de45bd1965 100644 --- a/clang/lib/Format/NamespaceEndCommentsFixer.cpp +++ b/clang/lib/Format/NamespaceEndCommentsFixer.cpp @@ -121,7 +121,25 @@ bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName, // Named namespace comments must not mention anonymous namespace. if (!NamespaceName.empty() && !AnonymousInComment.empty()) return false; - return NamespaceNameInComment == NamespaceName; + if (NamespaceNameInComment == NamespaceName) + return true; + + // Has namespace comment flowed onto the next line. + // } // namespace + // // verylongnamespacenamethatdidnotfitonthepreviouscommentline + if (!(Comment->Next && Comment->Next->is(TT_LineComment))) + return false; + + static const llvm::Regex CommentPattern = llvm::Regex( + "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase); + + // Pull out just the comment text. + if (!CommentPattern.match(Comment->Next->TokenText, &Groups)) { + return false; + } + NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : ""; + + return (NamespaceNameInComment == NamespaceName); } void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, @@ -187,6 +205,23 @@ std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( const SourceManager &SourceMgr = Env.getSourceManager(); AffectedRangeMgr.computeAffectedLines(AnnotatedLines); tooling::Replacements Fixes; + + // Spin through the lines and ensure we have balanced braces. + int Braces = 0; + for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { + FormatToken *Tok = AnnotatedLines[I]->First; + while (Tok) { + Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0; + Tok = Tok->Next; + } + } + // Don't attempt to comment unbalanced braces or this can + // lead to comments being placed on the closing brace which isn't + // the matching brace of the namespace. (occurs during incomplete editing). + if (Braces != 0) { + return {Fixes, 0}; + } + std::string AllNamespaceNames = ""; size_t StartLineIndex = SIZE_MAX; StringRef NamespaceTokenText; diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp index 5be243f4c07a..db2b65b08898 100644 --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -144,7 +144,7 @@ public: llvm::stable_sort(Indices, [&](unsigned LHSI, unsigned RHSI) { return References[LHSI] < References[RHSI]; }); - bool ReferencesInOrder = std::is_sorted(Indices.begin(), Indices.end()); + bool ReferencesInOrder = llvm::is_sorted(Indices); std::string ReferencesText; bool SymbolsInOrder = true; diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp index eb98a205d526..f1459a808ff8 100644 --- a/clang/lib/Format/TokenAnalyzer.cpp +++ b/clang/lib/Format/TokenAnalyzer.cpp @@ -64,11 +64,16 @@ TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { tooling::Replacements Result; - FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), - Env.getFirstStartColumn(), Style, Encoding); + llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; + IdentifierTable IdentTable(getFormattingLangOpts(Style)); + FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(), + Env.getFirstStartColumn(), Style, Encoding, Allocator, - UnwrappedLineParser Parser(Style, Tokens.getKeywords(), - Env.getFirstStartColumn(), Tokens.lex(), *this); + IdentTable); + ArrayRef<FormatToken *> Toks(Lex.lex()); + SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end()); + UnwrappedLineParser Parser(Style, Lex.getKeywords(), + Env.getFirstStartColumn(), Tokens, *this); Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); unsigned Penalty = 0; @@ -76,14 +81,14 @@ std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); SmallVector<AnnotatedLine *, 16> AnnotatedLines; - TokenAnnotator Annotator(Style, Tokens.getKeywords()); + TokenAnnotator Annotator(Style, Lex.getKeywords()); for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); Annotator.annotate(*AnnotatedLines.back()); } std::pair<tooling::Replacements, unsigned> RunResult = - analyze(Annotator, AnnotatedLines, Tokens); + analyze(Annotator, AnnotatedLines, Lex); LLVM_DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index d5d394e61926..7f8e35126512 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -118,9 +118,9 @@ private: if (Style.Language == FormatStyle::LK_TextProto || (Style.Language == FormatStyle::LK_Proto && Left->Previous && Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) - CurrentToken->Type = TT_DictLiteral; + CurrentToken->setType(TT_DictLiteral); else - CurrentToken->Type = TT_TemplateCloser; + CurrentToken->setType(TT_TemplateCloser); next(); return true; } @@ -131,7 +131,7 @@ private: } if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext && - Style.Language != FormatStyle::LK_Proto && + !Style.isCSharp() && Style.Language != FormatStyle::LK_Proto && Style.Language != FormatStyle::LK_TextProto)) return false; // If a && or || is found and interpreted as a binary operator, this set @@ -151,7 +151,7 @@ private: if (CurrentToken->is(tok::colon) || (CurrentToken->isOneOf(tok::l_brace, tok::less) && Previous->isNot(tok::colon))) - Previous->Type = TT_SelectorName; + Previous->setType(TT_SelectorName); } } if (!consumeToken()) @@ -160,6 +160,27 @@ private: return false; } + bool parseUntouchableParens() { + while (CurrentToken) { + CurrentToken->Finalized = true; + switch (CurrentToken->Tok.getKind()) { + case tok::l_paren: + next(); + if (!parseUntouchableParens()) + return false; + continue; + case tok::r_paren: + next(); + return true; + default: + // no-op + break; + } + next(); + } + return false; + } + bool parseParens(bool LookForDecls = false) { if (!CurrentToken) return false; @@ -171,6 +192,11 @@ private: Contexts.back().ColonIsForRangeExpr = Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; + if (Left->Previous && Left->Previous->is(TT_UntouchableMacroFunc)) { + Left->Finalized = true; + return parseUntouchableParens(); + } + bool StartsObjCMethodExpr = false; if (FormatToken *MaybeSel = Left->Previous) { // @selector( starts a selector. @@ -217,7 +243,7 @@ private: // This is the parameter list of an ObjC block. Contexts.back().IsExpression = false; } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { - Left->Type = TT_AttributeParen; + Left->setType(TT_AttributeParen); } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { // The first argument to a foreach macro is a declaration. Contexts.back().IsForEachMacro = true; @@ -233,7 +259,7 @@ private: if (StartsObjCMethodExpr) { Contexts.back().ColonIsObjCMethodExpr = true; - Left->Type = TT_ObjCMethodExpr; + Left->setType(TT_ObjCMethodExpr); } // MightBeFunctionType and ProbablyFunctionType are used for @@ -264,7 +290,7 @@ private: if (PrevPrev && PrevPrev->is(tok::identifier) && Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { - Prev->Type = TT_BinaryOperator; + Prev->setType(TT_BinaryOperator); LookForDecls = false; } } @@ -282,8 +308,8 @@ private: if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next && (CurrentToken->Next->is(tok::l_paren) || (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration))) - Left->Type = Left->Next->is(tok::caret) ? TT_ObjCBlockLParen - : TT_FunctionTypeLParen; + Left->setType(Left->Next->is(tok::caret) ? TT_ObjCBlockLParen + : TT_FunctionTypeLParen); Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; @@ -295,12 +321,12 @@ private: for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) { if (Tok->is(TT_BinaryOperator) && Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) - Tok->Type = TT_PointerOrReference; + Tok->setType(TT_PointerOrReference); } } if (StartsObjCMethodExpr) { - CurrentToken->Type = TT_ObjCMethodExpr; + CurrentToken->setType(TT_ObjCMethodExpr); if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; @@ -308,13 +334,13 @@ private: } if (Left->is(TT_AttributeParen)) - CurrentToken->Type = TT_AttributeParen; + CurrentToken->setType(TT_AttributeParen); if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) - CurrentToken->Type = TT_JavaAnnotation; + CurrentToken->setType(TT_JavaAnnotation); if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) - CurrentToken->Type = TT_LeadingJavaAnnotation; + CurrentToken->setType(TT_LeadingJavaAnnotation); if (Left->Previous && Left->Previous->is(TT_AttributeSquare)) - CurrentToken->Type = TT_AttributeSquare; + CurrentToken->setType(TT_AttributeSquare); if (!HasMultipleLines) Left->PackingKind = PPK_Inconclusive; @@ -330,7 +356,7 @@ private: return false; if (CurrentToken->is(tok::l_brace)) - Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen + Left->setType(TT_Unknown); // Not TT_ObjCBlockLParen if (CurrentToken->is(tok::comma) && CurrentToken->Next && !CurrentToken->Next->HasUnescapedNewline && !CurrentToken->Next->isTrailingComment()) @@ -342,13 +368,13 @@ private: if (CurrentToken->isOneOf(tok::semi, tok::colon)) { MightBeObjCForRangeLoop = false; if (PossibleObjCForInToken) { - PossibleObjCForInToken->Type = TT_Unknown; + PossibleObjCForInToken->setType(TT_Unknown); PossibleObjCForInToken = nullptr; } } if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) { PossibleObjCForInToken = CurrentToken; - PossibleObjCForInToken->Type = TT_ObjCForIn; + PossibleObjCForInToken->setType(TT_ObjCForIn); } // When we discover a 'new', we set CanBeExpression to 'false' in order to // parse the type correctly. Reset that after a comma. @@ -369,6 +395,17 @@ private: if (!Style.isCSharp()) return false; + // `identifier[i]` is not an attribute. + if (Tok.Previous && Tok.Previous->is(tok::identifier)) + return false; + + // Chains of [] in `identifier[i][j][k]` are not attributes. + if (Tok.Previous && Tok.Previous->is(tok::r_square)) { + auto *MatchingParen = Tok.Previous->MatchingParen; + if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare)) + return false; + } + const FormatToken *AttrTok = Tok.Next; if (!AttrTok) return false; @@ -385,15 +422,15 @@ private: if (!AttrTok) return false; - // Move past the end of ']'. + // Allow an attribute to be the only content of a file. AttrTok = AttrTok->Next; if (!AttrTok) - return false; + return true; // Limit this to being an access modifier that follows. if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected, - tok::kw_class, tok::kw_static, tok::l_square, - Keywords.kw_internal)) { + tok::comment, tok::kw_class, tok::kw_static, + tok::l_square, Keywords.kw_internal)) { return true; } @@ -460,7 +497,7 @@ private: Contexts.back().InCpp11AttributeSpecifier; // Treat C# Attributes [STAThread] much like C++ attributes [[...]]. - bool IsCSharp11AttributeSpecifier = + bool IsCSharpAttributeSpecifier = isCSharpAttributeSpecifier(*Left) || Contexts.back().InCSharpAttributeSpecifier; @@ -469,7 +506,8 @@ private: bool StartsObjCMethodExpr = !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates && Style.isCpp() && !IsCpp11AttributeSpecifier && - Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && + !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression && + Left->isNot(TT_LambdaLSquare) && !CurrentToken->isOneOf(tok::l_brace, tok::r_square) && (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, @@ -483,24 +521,26 @@ private: unsigned BindingIncrease = 1; if (IsCppStructuredBinding) { - Left->Type = TT_StructuredBindingLSquare; + Left->setType(TT_StructuredBindingLSquare); } else if (Left->is(TT_Unknown)) { if (StartsObjCMethodExpr) { - Left->Type = TT_ObjCMethodExpr; + Left->setType(TT_ObjCMethodExpr); + } else if (InsideInlineASM) { + Left->setType(TT_InlineASMSymbolicNameLSquare); } else if (IsCpp11AttributeSpecifier) { - Left->Type = TT_AttributeSquare; + Left->setType(TT_AttributeSquare); } else if (Style.Language == FormatStyle::LK_JavaScript && Parent && Contexts.back().ContextKind == tok::l_brace && Parent->isOneOf(tok::l_brace, tok::comma)) { - Left->Type = TT_JsComputedPropertyName; + Left->setType(TT_JsComputedPropertyName); } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace && Parent && Parent->isOneOf(tok::l_brace, tok::comma)) { - Left->Type = TT_DesignatedInitializerLSquare; - } else if (IsCSharp11AttributeSpecifier) { - Left->Type = TT_AttributeSquare; + Left->setType(TT_DesignatedInitializerLSquare); + } else if (IsCSharpAttributeSpecifier) { + Left->setType(TT_AttributeSquare); } else if (CurrentToken->is(tok::r_square) && Parent && Parent->is(TT_TemplateCloser)) { - Left->Type = TT_ArraySubscriptLSquare; + Left->setType(TT_ArraySubscriptLSquare); } else if (Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { // Square braces in LK_Proto can either be message field attributes: @@ -529,13 +569,13 @@ private: // // In the first and the third case we want to spread the contents inside // the square braces; in the second we want to keep them inline. - Left->Type = TT_ArrayInitializerLSquare; + Left->setType(TT_ArrayInitializerLSquare); if (!Left->endsSequence(tok::l_square, tok::numeric_constant, tok::equal) && !Left->endsSequence(tok::l_square, tok::numeric_constant, tok::identifier) && !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) { - Left->Type = TT_ProtoExtensionLSquare; + Left->setType(TT_ProtoExtensionLSquare); BindingIncrease = 10; } } else if (!CppArrayTemplates && Parent && @@ -544,10 +584,10 @@ private: tok::question, tok::colon, tok::kw_return, // Should only be relevant to JavaScript: tok::kw_default)) { - Left->Type = TT_ArrayInitializerLSquare; + Left->setType(TT_ArrayInitializerLSquare); } else { BindingIncrease = 10; - Left->Type = TT_ArraySubscriptLSquare; + Left->setType(TT_ArraySubscriptLSquare); } } @@ -559,14 +599,14 @@ private: Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier; - Contexts.back().InCSharpAttributeSpecifier = IsCSharp11AttributeSpecifier; + Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier; while (CurrentToken) { if (CurrentToken->is(tok::r_square)) { if (IsCpp11AttributeSpecifier) - CurrentToken->Type = TT_AttributeSquare; - if (IsCSharp11AttributeSpecifier) - CurrentToken->Type = TT_AttributeSquare; + CurrentToken->setType(TT_AttributeSquare); + if (IsCSharpAttributeSpecifier) + CurrentToken->setType(TT_AttributeSquare); else if (((CurrentToken->Next && CurrentToken->Next->is(tok::l_paren)) || (CurrentToken->Previous && @@ -577,26 +617,26 @@ private: // will be expanded to more tokens. // FIXME: Do we incorrectly label ":" with this? StartsObjCMethodExpr = false; - Left->Type = TT_Unknown; + Left->setType(TT_Unknown); } if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { - CurrentToken->Type = TT_ObjCMethodExpr; + CurrentToken->setType(TT_ObjCMethodExpr); // If we haven't seen a colon yet, make sure the last identifier // before the r_square is tagged as a selector name component. if (!ColonFound && CurrentToken->Previous && CurrentToken->Previous->is(TT_Unknown) && canBeObjCSelectorComponent(*CurrentToken->Previous)) - CurrentToken->Previous->Type = TT_SelectorName; + CurrentToken->Previous->setType(TT_SelectorName); // determineStarAmpUsage() thinks that '*' '[' is allocating an // array of pointers, but if '[' starts a selector then '*' is a // binary operator. if (Parent && Parent->is(TT_PointerOrReference)) - Parent->Type = TT_BinaryOperator; + Parent->setType(TT_BinaryOperator); } // An arrow after an ObjC method expression is not a lambda arrow. - if (CurrentToken->Type == TT_ObjCMethodExpr && CurrentToken->Next && - CurrentToken->Next->is(TT_LambdaArrow)) - CurrentToken->Next->Type = TT_Unknown; + if (CurrentToken->getType() == TT_ObjCMethodExpr && + CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow)) + CurrentToken->Next->setType(TT_Unknown); Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // FirstObjCSelectorName is set when a colon is found. This does @@ -630,21 +670,21 @@ private: tok::kw_using)) { // Remember that this is a [[using ns: foo]] C++ attribute, so we // don't add a space before the colon (unlike other colons). - CurrentToken->Type = TT_AttributeColon; + CurrentToken->setType(TT_AttributeColon); } else if (Left->isOneOf(TT_ArraySubscriptLSquare, TT_DesignatedInitializerLSquare)) { - Left->Type = TT_ObjCMethodExpr; + Left->setType(TT_ObjCMethodExpr); StartsObjCMethodExpr = true; Contexts.back().ColonIsObjCMethodExpr = true; if (Parent && Parent->is(tok::r_paren)) // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. - Parent->Type = TT_CastRParen; + Parent->setType(TT_CastRParen); } ColonFound = true; } if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) && !ColonFound) - Left->Type = TT_ArrayInitializerLSquare; + Left->setType(TT_ArrayInitializerLSquare); FormatToken *Tok = CurrentToken; if (!consumeToken()) return false; @@ -659,7 +699,7 @@ private: Left->ParentBracket = Contexts.back().ContextKind; if (Contexts.back().CaretFound) - Left->Type = TT_ObjCBlockLBrace; + Left->setType(TT_ObjCBlockLBrace); Contexts.back().CaretFound = false; ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); @@ -688,18 +728,18 @@ private: (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { - Left->Type = TT_DictLiteral; + Left->setType(TT_DictLiteral); if (Previous->Tok.getIdentifierInfo() || Previous->is(tok::string_literal)) - Previous->Type = TT_SelectorName; + Previous->setType(TT_SelectorName); } if (CurrentToken->is(tok::colon) || Style.Language == FormatStyle::LK_JavaScript) - Left->Type = TT_DictLiteral; + Left->setType(TT_DictLiteral); } if (CurrentToken->is(tok::comma) && Style.Language == FormatStyle::LK_JavaScript) - Left->Type = TT_DictLiteral; + Left->setType(TT_DictLiteral); if (!consumeToken()) return false; } @@ -726,7 +766,7 @@ private: bool parseConditional() { while (CurrentToken) { if (CurrentToken->is(tok::colon)) { - CurrentToken->Type = TT_ConditionalExpr; + CurrentToken->setType(TT_ConditionalExpr); next(); return true; } @@ -738,7 +778,7 @@ private: bool parseTemplateDeclaration() { if (CurrentToken && CurrentToken->is(tok::less)) { - CurrentToken->Type = TT_TemplateOpener; + CurrentToken->setType(TT_TemplateOpener); next(); if (!parseAngle()) return false; @@ -756,7 +796,7 @@ private: case tok::plus: case tok::minus: if (!Tok->Previous && Line.MustBeDeclaration) - Tok->Type = TT_ObjCMethodSpecifier; + Tok->setType(TT_ObjCMethodSpecifier); break; case tok::colon: if (!Tok->Previous) @@ -773,21 +813,30 @@ private: (Contexts.size() == 1 && Line.MustBeDeclaration)) { // method/property declaration Contexts.back().IsExpression = false; - Tok->Type = TT_JsTypeColon; + Tok->setType(TT_JsTypeColon); + break; + } + } else if (Style.isCSharp()) { + if (Contexts.back().InCSharpAttributeSpecifier) { + Tok->setType(TT_AttributeColon); + break; + } + if (Contexts.back().ContextKind == tok::l_paren) { + Tok->setType(TT_CSharpNamedArgumentColon); break; } } if (Contexts.back().ColonIsDictLiteral || Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { - Tok->Type = TT_DictLiteral; + Tok->setType(TT_DictLiteral); if (Style.Language == FormatStyle::LK_TextProto) { if (FormatToken *Previous = Tok->getPreviousNonComment()) - Previous->Type = TT_SelectorName; + Previous->setType(TT_SelectorName); } } else if (Contexts.back().ColonIsObjCMethodExpr || Line.startsWith(TT_ObjCMethodSpecifier)) { - Tok->Type = TT_ObjCMethodExpr; + Tok->setType(TT_ObjCMethodExpr); const FormatToken *BeforePrevious = Tok->Previous->Previous; // Ensure we tag all identifiers in method declarations as // TT_SelectorName. @@ -802,7 +851,7 @@ private: BeforePrevious->is(tok::r_square) || Contexts.back().LongestObjCSelectorName == 0 || UnknownIdentifierInMethodDeclaration) { - Tok->Previous->Type = TT_SelectorName; + Tok->Previous->setType(TT_SelectorName); if (!Contexts.back().FirstObjCSelectorName) Contexts.back().FirstObjCSelectorName = Tok->Previous; else if (Tok->Previous->ColumnWidth > @@ -814,25 +863,30 @@ private: ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; } } else if (Contexts.back().ColonIsForRangeExpr) { - Tok->Type = TT_RangeBasedForLoopColon; + Tok->setType(TT_RangeBasedForLoopColon); } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { - Tok->Type = TT_BitFieldColon; + Tok->setType(TT_BitFieldColon); } else if (Contexts.size() == 1 && !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { - if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren, - tok::kw_noexcept)) - Tok->Type = TT_CtorInitializerColon; - else - Tok->Type = TT_InheritanceColon; + FormatToken *Prev = Tok->getPreviousNonComment(); + if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept)) + Tok->setType(TT_CtorInitializerColon); + else if (Prev->is(tok::kw_try)) { + // Member initializer list within function try block. + FormatToken *PrevPrev = Prev->getPreviousNonComment(); + if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept)) + Tok->setType(TT_CtorInitializerColon); + } else + Tok->setType(TT_InheritanceColon); } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next && (Tok->Next->isOneOf(tok::r_paren, tok::comma) || (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next && Tok->Next->Next->is(tok::colon)))) { // This handles a special macro in ObjC code where selectors including // the colon are passed as macro arguments. - Tok->Type = TT_ObjCMethodExpr; + Tok->setType(TT_ObjCMethodExpr); } else if (Contexts.back().ContextKind == tok::l_paren) { - Tok->Type = TT_InlineASMColon; + Tok->setType(TT_InlineASMColon); } break; case tok::pipe: @@ -841,7 +895,7 @@ private: // intersection types, respectively. if (Style.Language == FormatStyle::LK_JavaScript && !Contexts.back().IsExpression) - Tok->Type = TT_JsTypeOperator; + Tok->setType(TT_JsTypeOperator); break; case tok::kw_if: case tok::kw_while: @@ -877,9 +931,9 @@ private: if (Tok->Previous && Tok->Previous->is(tok::r_paren) && Tok->Previous->MatchingParen && Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) { - Tok->Previous->Type = TT_OverloadedOperator; - Tok->Previous->MatchingParen->Type = TT_OverloadedOperator; - Tok->Type = TT_OverloadedOperatorLParen; + Tok->Previous->setType(TT_OverloadedOperator); + Tok->Previous->MatchingParen->setType(TT_OverloadedOperator); + Tok->setType(TT_OverloadedOperatorLParen); } if (!parseParens()) @@ -898,15 +952,15 @@ private: case tok::l_brace: if (Style.Language == FormatStyle::LK_TextProto) { FormatToken *Previous = Tok->getPreviousNonComment(); - if (Previous && Previous->Type != TT_DictLiteral) - Previous->Type = TT_SelectorName; + if (Previous && Previous->getType() != TT_DictLiteral) + Previous->setType(TT_SelectorName); } if (!parseBrace()) return false; break; case tok::less: if (parseAngle()) { - Tok->Type = TT_TemplateOpener; + Tok->setType(TT_TemplateOpener); // In TT_Proto, we must distignuish between: // map<key, value> // msg < item: data > @@ -915,13 +969,13 @@ private: if (Style.Language == FormatStyle::LK_TextProto || (Style.Language == FormatStyle::LK_Proto && Tok->Previous && Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { - Tok->Type = TT_DictLiteral; + Tok->setType(TT_DictLiteral); FormatToken *Previous = Tok->getPreviousNonComment(); - if (Previous && Previous->Type != TT_DictLiteral) - Previous->Type = TT_SelectorName; + if (Previous && Previous->getType() != TT_DictLiteral) + Previous->setType(TT_SelectorName); } } else { - Tok->Type = TT_BinaryOperator; + Tok->setType(TT_BinaryOperator); NonTemplateLess.insert(Tok); CurrentToken = Tok; next(); @@ -937,7 +991,7 @@ private: break; case tok::greater: if (Style.Language != FormatStyle::LK_TextProto) - Tok->Type = TT_BinaryOperator; + Tok->setType(TT_BinaryOperator); if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser)) Tok->SpacesRequiredBefore = 1; break; @@ -948,20 +1002,29 @@ private: while (CurrentToken && !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { if (CurrentToken->isOneOf(tok::star, tok::amp)) - CurrentToken->Type = TT_PointerOrReference; + CurrentToken->setType(TT_PointerOrReference); consumeToken(); + if (CurrentToken && CurrentToken->is(tok::comma) && + CurrentToken->Previous->isNot(tok::kw_operator)) + break; if (CurrentToken && CurrentToken->Previous->isOneOf( TT_BinaryOperator, TT_UnaryOperator, tok::comma, tok::star, tok::arrow, tok::amp, tok::ampamp)) - CurrentToken->Previous->Type = TT_OverloadedOperator; - } - if (CurrentToken) { - CurrentToken->Type = TT_OverloadedOperatorLParen; - if (CurrentToken->Previous->is(TT_BinaryOperator)) - CurrentToken->Previous->Type = TT_OverloadedOperator; + CurrentToken->Previous->setType(TT_OverloadedOperator); } + if (CurrentToken && CurrentToken->is(tok::l_paren)) + CurrentToken->setType(TT_OverloadedOperatorLParen); + if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator)) + CurrentToken->Previous->setType(TT_OverloadedOperator); break; case tok::question: + if (Tok->is(TT_CSharpNullConditionalLSquare)) { + if (!parseSquare()) + return false; + break; + } + if (Tok->isOneOf(TT_CSharpNullConditional, TT_CSharpNullCoalescing)) + break; if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next && Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren, tok::r_brace)) { @@ -969,7 +1032,7 @@ private: // types (fields, parameters), e.g. // function(x?: string, y?) {...} // class X { y?; } - Tok->Type = TT_JsTypeOptionalQuestion; + Tok->setType(TT_JsTypeOptionalQuestion); break; } // Declarations cannot be conditional expressions, this can only be part @@ -977,6 +1040,18 @@ private: if (Line.MustBeDeclaration && !Contexts.back().IsExpression && Style.Language == FormatStyle::LK_JavaScript) break; + if (Style.isCSharp()) { + // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be + // nullable types. + // Line.MustBeDeclaration will be true for `Type? name;`. + if ((!Contexts.back().IsExpression && Line.MustBeDeclaration) || + (Tok->Next && Tok->Next->isOneOf(tok::r_paren, tok::greater)) || + (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next && + Tok->Next->Next->is(tok::equal))) { + Tok->setType(TT_CSharpNullable); + break; + } + } parseConditional(); break; case tok::kw_template: @@ -984,9 +1059,9 @@ private: break; case tok::comma: if (Contexts.back().InCtorInitializer) - Tok->Type = TT_CtorInitializerComma; + Tok->setType(TT_CtorInitializerComma); else if (Contexts.back().InInheritanceList) - Tok->Type = TT_InheritanceComma; + Tok->setType(TT_InheritanceComma); else if (Contexts.back().FirstStartOfName && (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) { Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; @@ -1000,6 +1075,11 @@ private: Keywords.kw___has_include_next)) { parseHasInclude(); } + if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next && + Tok->Next->isNot(tok::l_paren)) { + Tok->setType(TT_CSharpGenericTypeConstraint); + parseCSharpGenericTypeConstraint(); + } break; default: break; @@ -1007,6 +1087,35 @@ private: return true; } + void parseCSharpGenericTypeConstraint() { + int OpenAngleBracketsCount = 0; + while (CurrentToken) { + if (CurrentToken->is(tok::less)) { + // parseAngle is too greedy and will consume the whole line. + CurrentToken->setType(TT_TemplateOpener); + ++OpenAngleBracketsCount; + next(); + } else if (CurrentToken->is(tok::greater)) { + CurrentToken->setType(TT_TemplateCloser); + --OpenAngleBracketsCount; + next(); + } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) { + // We allow line breaks after GenericTypeConstraintComma's + // so do not flag commas in Generics as GenericTypeConstraintComma's. + CurrentToken->setType(TT_CSharpGenericTypeConstraintComma); + next(); + } else if (CurrentToken->is(Keywords.kw_where)) { + CurrentToken->setType(TT_CSharpGenericTypeConstraint); + next(); + } else if (CurrentToken->is(tok::colon)) { + CurrentToken->setType(TT_CSharpGenericTypeConstraintColon); + next(); + } else { + next(); + } + } + } + void parseIncludeDirective() { if (CurrentToken && CurrentToken->is(tok::less)) { next(); @@ -1015,7 +1124,7 @@ private: // literals. if (CurrentToken->isNot(tok::comment) && !CurrentToken->TokenText.startswith("//")) - CurrentToken->Type = TT_ImplicitStringLiteral; + CurrentToken->setType(TT_ImplicitStringLiteral); next(); } } @@ -1027,7 +1136,7 @@ private: // warning or error. next(); while (CurrentToken) { - CurrentToken->Type = TT_ImplicitStringLiteral; + CurrentToken->setType(TT_ImplicitStringLiteral); next(); } } @@ -1041,7 +1150,7 @@ private: next(); // Consume first token (so we fix leading whitespace). while (CurrentToken) { if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator)) - CurrentToken->Type = TT_ImplicitStringLiteral; + CurrentToken->setType(TT_ImplicitStringLiteral); next(); } } @@ -1068,7 +1177,7 @@ private: // Treat these like C++ #include directives. while (CurrentToken) { // Tokens cannot be comments here. - CurrentToken->Type = TT_ImplicitStringLiteral; + CurrentToken->setType(TT_ImplicitStringLiteral); next(); } return LT_ImportStatement; @@ -1228,8 +1337,8 @@ private: TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator, TT_RegexLiteral, TT_TemplateString, - TT_ObjCStringLiteral)) - CurrentToken->Type = TT_Unknown; + TT_ObjCStringLiteral, TT_UntouchableMacroFunc)) + CurrentToken->setType(TT_Unknown); CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; CurrentToken->FakeLParens.clear(); @@ -1317,7 +1426,7 @@ private: if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && Previous->isOneOf(tok::star, tok::amp, tok::ampamp) && Previous->Previous && Previous->Previous->isNot(tok::equal)) - Previous->Type = TT_PointerOrReference; + Previous->setType(TT_PointerOrReference); } } } else if (Current.is(tok::lessless) && @@ -1339,7 +1448,7 @@ private: for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); Previous = Previous->Previous) - Previous->Type = TT_PointerOrReference; + Previous->setType(TT_PointerOrReference); if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer) Contexts.back().IsExpression = false; } else if (Current.is(tok::kw_new)) { @@ -1423,19 +1532,36 @@ private: // The token type is already known. return; + if (Style.isCSharp() && CurrentToken->is(tok::question)) { + if (CurrentToken->TokenText == "??") { + Current.setType(TT_CSharpNullCoalescing); + return; + } + if (CurrentToken->TokenText == "?.") { + Current.setType(TT_CSharpNullConditional); + return; + } + if (CurrentToken->TokenText == "?[") { + Current.setType(TT_CSharpNullConditionalLSquare); + return; + } + } + if (Style.Language == FormatStyle::LK_JavaScript) { if (Current.is(tok::exclaim)) { if (Current.Previous && - (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace, - tok::r_paren, tok::r_square, - tok::r_brace) || + (Keywords.IsJavaScriptIdentifier( + *Current.Previous, /* AcceptIdentifierName= */ true) || + Current.Previous->isOneOf( + tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace, + Keywords.kw_type, Keywords.kw_get, Keywords.kw_set) || Current.Previous->Tok.isLiteral())) { - Current.Type = TT_JsNonNullAssertion; + Current.setType(TT_JsNonNullAssertion); return; } if (Current.Next && Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) { - Current.Type = TT_JsNonNullAssertion; + Current.setType(TT_JsNonNullAssertion); return; } } @@ -1445,11 +1571,11 @@ private: // function declaration have been found. In this case, 'Current' is a // trailing token of this declaration and thus cannot be a name. if (Current.is(Keywords.kw_instanceof)) { - Current.Type = TT_BinaryOperator; + Current.setType(TT_BinaryOperator); } else if (isStartOfName(Current) && (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { Contexts.back().FirstStartOfName = &Current; - Current.Type = TT_StartOfName; + Current.setType(TT_StartOfName); } else if (Current.is(tok::semi)) { // Reset FirstStartOfName after finding a semicolon so that a for loop // with multiple increment statements is not confused with a for loop @@ -1459,57 +1585,57 @@ private: AutoFound = true; } else if (Current.is(tok::arrow) && Style.Language == FormatStyle::LK_Java) { - Current.Type = TT_LambdaArrow; + Current.setType(TT_LambdaArrow); } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && Current.NestingLevel == 0 && !Current.Previous->is(tok::kw_operator)) { // not auto operator->() -> xxx; - Current.Type = TT_TrailingReturnArrow; + Current.setType(TT_TrailingReturnArrow); } else if (isDeductionGuide(Current)) { // Deduction guides trailing arrow " A(...) -> A<T>;". - Current.Type = TT_TrailingReturnArrow; + Current.setType(TT_TrailingReturnArrow); } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { - Current.Type = determineStarAmpUsage(Current, - Contexts.back().CanBeExpression && - Contexts.back().IsExpression, - Contexts.back().InTemplateArgument); + Current.setType(determineStarAmpUsage( + Current, + Contexts.back().CanBeExpression && Contexts.back().IsExpression, + Contexts.back().InTemplateArgument)); } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { - Current.Type = determinePlusMinusCaretUsage(Current); + Current.setType(determinePlusMinusCaretUsage(Current)); if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) Contexts.back().CaretFound = true; } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { - Current.Type = determineIncrementUsage(Current); + Current.setType(determineIncrementUsage(Current)); } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { - Current.Type = TT_UnaryOperator; + Current.setType(TT_UnaryOperator); } else if (Current.is(tok::question)) { if (Style.Language == FormatStyle::LK_JavaScript && Line.MustBeDeclaration && !Contexts.back().IsExpression) { // In JavaScript, `interface X { foo?(): bar; }` is an optional method // on the interface, not a ternary expression. - Current.Type = TT_JsTypeOptionalQuestion; + Current.setType(TT_JsTypeOptionalQuestion); } else { - Current.Type = TT_ConditionalExpr; + Current.setType(TT_ConditionalExpr); } } else if (Current.isBinaryOperator() && (!Current.Previous || Current.Previous->isNot(tok::l_square)) && (!Current.is(tok::greater) && Style.Language != FormatStyle::LK_TextProto)) { - Current.Type = TT_BinaryOperator; + Current.setType(TT_BinaryOperator); } else if (Current.is(tok::comment)) { if (Current.TokenText.startswith("/*")) { if (Current.TokenText.endswith("*/")) - Current.Type = TT_BlockComment; + Current.setType(TT_BlockComment); else // The lexer has for some reason determined a comment here. But we // cannot really handle it, if it isn't properly terminated. Current.Tok.setKind(tok::unknown); } else { - Current.Type = TT_LineComment; + Current.setType(TT_LineComment); } } else if (Current.is(tok::r_paren)) { if (rParenEndsCast(Current)) - Current.Type = TT_CastRParen; + Current.setType(TT_CastRParen); if (Current.MatchingParen && Current.Next && !Current.Next->isBinaryOperator() && !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, @@ -1524,7 +1650,7 @@ private: BeforeParen->TokenText == BeforeParen->TokenText.upper() && (!BeforeParen->Previous || BeforeParen->Previous->ClosesTemplateDeclaration)) - Current.Type = TT_FunctionAnnotationRParen; + Current.setType(TT_FunctionAnnotationRParen); } } } else if (Current.is(tok::at) && Current.Next && @@ -1536,10 +1662,10 @@ private: case tok::objc_interface: case tok::objc_implementation: case tok::objc_protocol: - Current.Type = TT_ObjCDecl; + Current.setType(TT_ObjCDecl); break; case tok::objc_property: - Current.Type = TT_ObjCProperty; + Current.setType(TT_ObjCProperty); break; default: break; @@ -1548,11 +1674,11 @@ private: FormatToken *PreviousNoComment = Current.getPreviousNonComment(); if (PreviousNoComment && PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) - Current.Type = TT_DesignatedInitializerPeriod; + Current.setType(TT_DesignatedInitializerPeriod); else if (Style.Language == FormatStyle::LK_Java && Current.Previous && Current.Previous->isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) { - Current.Type = Current.Previous->Type; + Current.setType(Current.Previous->getType()); } } else if (canBeObjCSelectorComponent(Current) && // FIXME(bug 36976): ObjC return types shouldn't use @@ -1565,7 +1691,7 @@ private: // This is the first part of an Objective-C selector name. (If there's no // colon after this, this is the only place which annotates the identifier // as a selector.) - Current.Type = TT_SelectorName; + Current.setType(TT_SelectorName); } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept) && Current.Previous && @@ -1573,7 +1699,7 @@ private: Line.MightBeFunctionDecl && Contexts.size() == 1) { // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. - Current.Type = TT_TrailingAnnotation; + Current.setType(TT_TrailingAnnotation); } else if ((Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) && Current.Previous) { @@ -1582,13 +1708,13 @@ private: const FormatToken &AtToken = *Current.Previous; const FormatToken *Previous = AtToken.getPreviousNonComment(); if (!Previous || Previous->is(TT_LeadingJavaAnnotation)) - Current.Type = TT_LeadingJavaAnnotation; + Current.setType(TT_LeadingJavaAnnotation); else - Current.Type = TT_JavaAnnotation; + Current.setType(TT_JavaAnnotation); } else if (Current.Previous->is(tok::period) && Current.Previous->isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) { - Current.Type = Current.Previous->Type; + Current.setType(Current.Previous->getType()); } } } @@ -1640,8 +1766,9 @@ private: /// Determine whether ')' is ending a cast. bool rParenEndsCast(const FormatToken &Tok) { - // C-style casts are only used in C++ and Java. - if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java) + // C-style casts are only used in C++, C# and Java. + if (!Style.isCSharp() && !Style.isCpp() && + Style.Language != FormatStyle::LK_Java) return false; // Empty parens aren't casts and there are no casts at the end of the line. @@ -1676,6 +1803,10 @@ private: if (Tok.Next->is(tok::question)) return false; + // `foreach((A a, B b) in someList)` should not be seen as a cast. + if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp()) + return false; + // Functions which end with decorations like volatile, noexcept are unlikely // to be casts. if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const, @@ -1749,6 +1880,10 @@ private: if (Style.Language == FormatStyle::LK_JavaScript) return TT_BinaryOperator; + // && in C# must be a binary operator. + if (Style.isCSharp() && Tok.is(tok::ampamp)) + return TT_BinaryOperator; + const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken) return TT_UnaryOperator; @@ -1800,14 +1935,16 @@ private: return TT_BinaryOperator; // "&&(" is quite unlikely to be two successive unary "&". - if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren)) + if (Tok.is(tok::ampamp) && NextToken->is(tok::l_paren)) return TT_BinaryOperator; // This catches some cases where evaluation order is used as control flow: // aaa && aaa->f(); - const FormatToken *NextNextToken = NextToken->getNextNonComment(); - if (NextNextToken && NextNextToken->is(tok::arrow)) - return TT_BinaryOperator; + if (NextToken->Tok.isAnyIdentifier()) { + const FormatToken *NextNextToken = NextToken->getNextNonComment(); + if (NextNextToken && NextNextToken->is(tok::arrow)) + return TT_BinaryOperator; + } // It is very unlikely that we are going to find a pointer or reference type // definition on the RHS of an assignment. @@ -2176,6 +2313,10 @@ static bool isFunctionDeclarationName(const FormatToken &Current, Next = Next->Next; continue; } + if (Next->is(TT_TemplateOpener) && Next->MatchingParen) { + Next = Next->MatchingParen; + continue; + } break; } @@ -2277,7 +2418,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { bool InFunctionDecl = Line.MightBeFunctionDecl; while (Current) { if (isFunctionDeclarationName(*Current, Line)) - Current->Type = TT_FunctionDeclarationName; + Current->setType(TT_FunctionDeclarationName); if (Current->is(TT_LineComment)) { if (Current->Previous->BlockKind == BK_BracedInit && Current->Previous->opensScope()) @@ -2596,7 +2737,7 @@ bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const { /// otherwise. static bool isKeywordWithCondition(const FormatToken &Tok) { return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, - tok::kw_constexpr); + tok::kw_constexpr, tok::kw_catch); } bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, @@ -2703,15 +2844,48 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Left.Previous && !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon, tok::l_square)); + // Ensure right pointer alignement with ellipsis e.g. int *...P + if (Left.is(tok::ellipsis) && Left.Previous && + Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp)) + return Style.PointerAlignment != FormatStyle::PAS_Right; + if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; - if (Right.isOneOf(tok::star, tok::amp, tok::ampamp) && - (Left.is(tok::identifier) || Left.isSimpleTypeSpecifier()) && - Left.Previous && Left.Previous->is(tok::kw_operator)) - // Space between the type and the * - // operator void*(), operator char*(), operator Foo*() dependant - // on PointerAlignment style. - return (Style.PointerAlignment != FormatStyle::PAS_Left); + if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp)) + return false; + if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) { + const FormatToken *Previous = &Left; + while (Previous && !Previous->is(tok::kw_operator)) { + if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) { + Previous = Previous->getPreviousNonComment(); + continue; + } + if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) { + Previous = Previous->MatchingParen->getPreviousNonComment(); + continue; + } + if (Previous->is(tok::coloncolon)) { + Previous = Previous->getPreviousNonComment(); + continue; + } + break; + } + // Space between the type and the * in: + // operator void*() + // operator char*() + // operator /*comment*/ const char*() + // operator volatile /*comment*/ char*() + // operator Foo*() + // operator C<T>*() + // operator std::Foo*() + // operator C<T>::D<U>*() + // dependent on PointerAlignment style. + if (Previous && + (Previous->endsSequence(tok::kw_operator) || + Previous->endsSequence(tok::kw_const, tok::kw_operator) || + Previous->endsSequence(tok::kw_volatile, tok::kw_operator))) + return (Style.PointerAlignment != FormatStyle::PAS_Left); + } const auto SpaceRequiredForArrayInitializerLSquare = [](const FormatToken &LSquareTok, const FormatStyle &Style) { return Style.SpacesInContainerLiterals || @@ -2755,10 +2929,19 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, // No whitespace in x(/*foo=*/1), except for JavaScript. return Style.Language == FormatStyle::LK_JavaScript || !Left.TokenText.endswith("=*/"); + + // Space between template and attribute. + // e.g. template <typename T> [[nodiscard]] ... + if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare)) + return true; if (Right.is(tok::l_paren)) { if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) || (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) return true; + if (Style.SpaceBeforeParens == + FormatStyle::SBPO_ControlStatementsExceptForEachMacros && + Left.is(TT_ForEachMacro)) + return false; return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, @@ -2807,7 +2990,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) // Objective-C dictionary literal -> no space before closing brace. return false; - if (Right.Type == TT_TrailingAnnotation && + if (Right.getType() == TT_TrailingAnnotation && Right.isOneOf(tok::amp, tok::ampamp) && Left.isOneOf(tok::kw_const, tok::kw_volatile) && (!Right.Next || Right.Next->is(tok::semi))) @@ -2855,13 +3038,83 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(tok::numeric_constant) && Right.is(tok::percent)) return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin(); } else if (Style.isCSharp()) { + // Require spaces around '{' and before '}' unless they appear in + // interpolated strings. Interpolated strings are merged into a single token + // so cannot have spaces inserted by this function. + + // No space between 'this' and '[' + if (Left.is(tok::kw_this) && Right.is(tok::l_square)) + return false; + + // No space between 'new' and '(' + if (Left.is(tok::kw_new) && Right.is(tok::l_paren)) + return false; + + // Space before { (including space within '{ {'). + if (Right.is(tok::l_brace)) + return true; + + // Spaces inside braces. + if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace)) + return true; + + if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace)) + return true; + + // Spaces around '=>'. + if (Left.is(TT_JsFatArrow) || Right.is(TT_JsFatArrow)) + return true; + + // No spaces around attribute target colons + if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon)) + return false; + // space between type and variable e.g. Dictionary<string,string> foo; if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName)) return true; + + // spaces inside square brackets. + if (Left.is(tok::l_square) || Right.is(tok::r_square)) + return Style.SpacesInSquareBrackets; + + // No space before ? in nullable types. + if (Right.is(TT_CSharpNullable)) + return false; + + // Require space after ? in nullable types except in generics and casts. + if (Left.is(TT_CSharpNullable)) + return !Right.isOneOf(TT_TemplateCloser, tok::r_paren); + + // No space before or after '?.'. + if (Left.is(TT_CSharpNullConditional) || Right.is(TT_CSharpNullConditional)) + return false; + + // Space before and after '??'. + if (Left.is(TT_CSharpNullCoalescing) || Right.is(TT_CSharpNullCoalescing)) + return true; + + // No space before '?['. + if (Right.is(TT_CSharpNullConditionalLSquare)) + return false; + + // No space between consecutive commas '[,,]'. + if (Left.is(tok::comma) && Right.is(tok::comma)) + return false; + + // Possible space inside `?[ 0 ]`. + if (Left.is(TT_CSharpNullConditionalLSquare)) + return Style.SpacesInSquareBrackets; + + // space after var in `var (key, value)` + if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren)) + return true; + // space between keywords and paren e.g. "using (" if (Right.is(tok::l_paren)) - if (Left.is(tok::kw_using)) - return spaceRequiredBeforeParens(Left); + if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when, + Keywords.kw_lock)) + return Style.SpaceBeforeParens == FormatStyle::SBPO_ControlStatements || + spaceRequiredBeforeParens(Right); } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(TT_JsFatArrow)) return true; @@ -2881,9 +3134,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) return false; // In tagged template literals ("html`bar baz`"), there is no space between - // the tag identifier and the template string. getIdentifierInfo makes sure - // that the identifier is not a pseudo keyword like `yield`, either. - if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) && + // the tag identifier and the template string. + if (Keywords.IsJavaScriptIdentifier(Left, + /* AcceptIdentifierName= */ false) && Right.is(TT_TemplateString)) return false; if (Right.is(tok::star) && @@ -3012,6 +3265,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return Style.SpacesInContainerLiterals; if (Right.is(TT_AttributeColon)) return false; + if (Right.is(TT_CSharpNamedArgumentColon)) + return false; return true; } if (Left.is(TT_UnaryOperator)) { @@ -3062,12 +3317,13 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) + // Put a space between < and :: in vector< ::std::string > return (Left.is(TT_TemplateOpener) && - Style.Standard < FormatStyle::LS_Cpp11) || + (Style.Standard < FormatStyle::LS_Cpp11 || Style.SpacesInAngles)) || !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, - tok::kw___super, TT_TemplateCloser, - TT_TemplateOpener)) || - (Left.is(tok ::l_paren) && Style.SpacesInParentheses); + tok::kw___super, TT_TemplateOpener, + TT_TemplateCloser)) || + (Left.is(tok::l_paren) && Style.SpacesInParentheses); if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return Style.SpacesInAngles; // Space before TT_StructuredBindingLSquare. @@ -3104,13 +3360,67 @@ static bool isAllmanBrace(const FormatToken &Tok) { !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral); } +// Returns 'true' if 'Tok' is an function argument. +static bool IsFunctionArgument(const FormatToken &Tok) { + return Tok.MatchingParen && Tok.MatchingParen->Next && + Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren); +} + +static bool +isItAnEmptyLambdaAllowed(const FormatToken &Tok, + FormatStyle::ShortLambdaStyle ShortLambdaOption) { + return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None; +} + +static bool +isItAInlineLambdaAllowed(const FormatToken &Tok, + FormatStyle::ShortLambdaStyle ShortLambdaOption) { + return (ShortLambdaOption == FormatStyle::SLS_Inline && + IsFunctionArgument(Tok)) || + (ShortLambdaOption == FormatStyle::SLS_All); +} + +static bool isOneChildWithoutMustBreakBefore(const FormatToken &Tok) { + if (Tok.Children.size() != 1) + return false; + FormatToken *curElt = Tok.Children[0]->First; + while (curElt) { + if (curElt->MustBreakBefore) + return false; + curElt = curElt->Next; + } + return true; +} +static bool isAllmanLambdaBrace(const FormatToken &Tok) { + return (Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && + !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral)); +} + +static bool isAllmanBraceIncludedBreakableLambda( + const FormatToken &Tok, FormatStyle::ShortLambdaStyle ShortLambdaOption) { + if (!isAllmanLambdaBrace(Tok)) + return false; + + if (isItAnEmptyLambdaAllowed(Tok, ShortLambdaOption)) + return false; + + return !isItAInlineLambdaAllowed(Tok, ShortLambdaOption) || + !isOneChildWithoutMustBreakBefore(Tok); +} + bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) { const FormatToken &Left = *Right.Previous; if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0) return true; - if (Style.Language == FormatStyle::LK_JavaScript) { + if (Style.isCSharp()) { + if (Right.is(TT_CSharpNamedArgumentColon) || + Left.is(TT_CSharpNamedArgumentColon)) + return false; + if (Right.is(TT_CSharpGenericTypeConstraint)) + return true; + } else if (Style.Language == FormatStyle::LK_JavaScript) { // FIXME: This might apply to other languages and token kinds. if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous && Left.Previous->is(tok::string_literal)) @@ -3133,6 +3443,25 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, // JavaScript top-level enum key/value pairs are put on separate lines // instead of bin-packing. return true; + if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous && + Left.Previous->is(TT_JsFatArrow)) { + // JS arrow function (=> {...}). + switch (Style.AllowShortLambdasOnASingleLine) { + case FormatStyle::SLS_All: + return false; + case FormatStyle::SLS_None: + return true; + case FormatStyle::SLS_Empty: + return !Left.Children.empty(); + case FormatStyle::SLS_Inline: + // allow one-lining inline (e.g. in function call args) and empty arrow + // functions. + return (Left.NestingLevel == 0 && Line.Level == 0) && + !Left.Children.empty(); + } + llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum"); + } + if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && !Left.Children.empty()) // Support AllowShortFunctionsOnASingleLine for JavaScript. @@ -3220,6 +3549,14 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, } if (Right.is(TT_InlineASMBrace)) return Right.HasUnescapedNewline; + + auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine; + if (Style.BraceWrapping.BeforeLambdaBody && + (isAllmanBraceIncludedBreakableLambda(Left, ShortLambdaOption) || + isAllmanBraceIncludedBreakableLambda(Right, ShortLambdaOption))) { + return true; + } + if (isAllmanBrace(Left) || isAllmanBrace(Right)) return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) || (Line.startsWith(tok::kw_typedef, tok::kw_enum) && @@ -3231,8 +3568,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; if (Left.is(TT_LambdaLBrace)) { - if (Left.MatchingParen && Left.MatchingParen->Next && - Left.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren) && + if (IsFunctionArgument(Left) && Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) return false; @@ -3243,13 +3579,6 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; } - // Put multiple C# attributes on a new line. - if (Style.isCSharp() && - ((Left.is(TT_AttributeSquare) && Left.is(tok::r_square)) || - (Left.is(tok::r_square) && Right.is(TT_AttributeSquare) && - Right.is(tok::l_square)))) - return true; - // Put multiple Java annotation on a new line. if ((Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) && @@ -3376,9 +3705,15 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) { const FormatToken &Left = *Right.Previous; - // Language-specific stuff. - if (Style.Language == FormatStyle::LK_Java) { + if (Style.isCSharp()) { + if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) || + Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) + return false; + // Only break after commas for generic type constraints. + if (Line.First->is(TT_CSharpGenericTypeConstraint)) + return Left.is(TT_CSharpGenericTypeConstraintComma); + } else if (Style.Language == FormatStyle::LK_Java) { if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, Keywords.kw_implements)) return false; @@ -3592,7 +3927,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(tok::kw___attribute) || (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) - return true; + return !Left.is(TT_AttributeSquare); if (Left.is(tok::identifier) && Right.is(tok::string_literal)) return true; @@ -3637,11 +3972,21 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) || (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) return false; + + auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine; + if (Style.BraceWrapping.BeforeLambdaBody) { + if (isAllmanLambdaBrace(Left)) + return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption); + if (isAllmanLambdaBrace(Right)) + return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption); + } + return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, tok::kw_class, tok::kw_struct, tok::comment) || Right.isMemberAccess() || Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, tok::colon, tok::l_square, tok::at) || + (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) || (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_const)) || (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || @@ -3654,9 +3999,9 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { while (Tok) { llvm::errs() << " M=" << Tok->MustBreakBefore << " C=" << Tok->CanBreakBefore - << " T=" << getTokenTypeName(Tok->Type) + << " T=" << getTokenTypeName(Tok->getType()) << " S=" << Tok->SpacesRequiredBefore - << " B=" << Tok->BlockParameterCount + << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind << " FakeLParens="; diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index fec85f1174da..22f27a668dcc 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -64,6 +64,8 @@ public: } if (static_cast<int>(Indent) + Offset >= 0) Indent += Offset; + if (Line.First->is(TT_CSharpGenericTypeConstraint)) + Indent = Line.Level * Style.IndentWidth + Style.ContinuationIndentWidth; } /// Update the indent state given that \p Line indent should be @@ -340,21 +342,6 @@ private: ? 1 : 0; } - // Try to merge either empty or one-line block if is precedeed by control - // statement token - if (TheLine->First->is(tok::l_brace) && TheLine->First == TheLine->Last && - I != AnnotatedLines.begin() && - I[-1]->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { - unsigned MergedLines = 0; - if (Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never) { - MergedLines = tryMergeSimpleBlock(I - 1, E, Limit); - // If we managed to merge the block, discard the first merged line - // since we are merging starting from I. - if (MergedLines > 0) - --MergedLines; - } - return MergedLines; - } // Don't merge block with left brace wrapped after ObjC special blocks if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() && I[-1]->First->is(tok::at) && I[-1]->First->Next) { @@ -404,7 +391,7 @@ private: ? tryMergeSimpleControlStatement(I, E, Limit) : 0; } - if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { + if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while, tok::kw_do)) { return Style.AllowShortLoopsOnASingleLine ? tryMergeSimpleControlStatement(I, E, Limit) : 0; @@ -449,7 +436,10 @@ private: return 0; Limit = limitConsideringMacros(I + 1, E, Limit); AnnotatedLine &Line = **I; - if (Line.Last->isNot(tok::r_paren)) + if (!Line.First->is(tok::kw_do) && Line.Last->isNot(tok::r_paren)) + return 0; + // Only merge do while if do is the only statement on the line. + if (Line.First->is(tok::kw_do) && !Line.Last->is(tok::kw_do)) return 0; if (1 + I[1]->Last->TotalLength > Limit) return 0; @@ -593,9 +583,10 @@ private: FormatToken *RecordTok = Line.First; // Skip record modifiers. while (RecordTok->Next && - RecordTok->isOneOf(tok::kw_typedef, tok::kw_export, - Keywords.kw_declare, Keywords.kw_abstract, - tok::kw_default)) + RecordTok->isOneOf( + tok::kw_typedef, tok::kw_export, Keywords.kw_declare, + Keywords.kw_abstract, tok::kw_default, tok::kw_public, + tok::kw_private, tok::kw_protected, Keywords.kw_internal)) RecordTok = RecordTok->Next; if (RecordTok && RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, @@ -817,7 +808,8 @@ protected: if (!DryRun) { Whitespaces->replaceWhitespace( *Child->First, /*Newlines=*/0, /*Spaces=*/1, - /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); + /*StartOfTokenColumn=*/State.Column, /*IsAligned=*/false, + State.Line->InPPDirective); } Penalty += formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun); @@ -1238,7 +1230,8 @@ void UnwrappedLineFormatter::formatFirstToken( // If in Whitemsmiths mode, indent start and end of blocks if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { - if (RootToken.isOneOf(tok::l_brace, tok::r_brace, tok::kw_case)) + if (RootToken.isOneOf(tok::l_brace, tok::r_brace, tok::kw_case, + tok::kw_default)) Indent += Style.IndentWidth; } @@ -1249,6 +1242,7 @@ void UnwrappedLineFormatter::formatFirstToken( Indent = 0; Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent, + /*IsAligned=*/false, Line.InPPDirective && !RootToken.HasUnescapedNewline); } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index ead6b4743207..a37386425aae 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "UnwrappedLineParser.h" +#include "FormatToken.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -323,13 +324,54 @@ void UnwrappedLineParser::parseFile() { addUnwrappedLine(); } +void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { + do { + switch (FormatTok->Tok.getKind()) { + case tok::l_brace: + return; + default: + if (FormatTok->is(Keywords.kw_where)) { + addUnwrappedLine(); + nextToken(); + parseCSharpGenericTypeConstraint(); + break; + } + nextToken(); + break; + } + } while (!eof()); +} + +void UnwrappedLineParser::parseCSharpAttribute() { + int UnpairedSquareBrackets = 1; + do { + switch (FormatTok->Tok.getKind()) { + case tok::r_square: + nextToken(); + --UnpairedSquareBrackets; + if (UnpairedSquareBrackets == 0) { + addUnwrappedLine(); + return; + } + break; + case tok::l_square: + ++UnpairedSquareBrackets; + nextToken(); + break; + default: + nextToken(); + break; + } + } while (!eof()); +} + void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { bool SwitchLabelEncountered = false; do { tok::TokenKind kind = FormatTok->Tok.getKind(); - if (FormatTok->Type == TT_MacroBlockBegin) { + if (FormatTok->getType() == TT_MacroBlockBegin) { kind = tok::l_brace; - } else if (FormatTok->Type == TT_MacroBlockEnd) { + } else if (FormatTok->getType() == TT_MacroBlockEnd) { kind = tok::r_brace; } @@ -381,6 +423,13 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { SwitchLabelEncountered = true; parseStructuralElement(); break; + case tok::l_square: + if (Style.isCSharp()) { + nextToken(); + parseCSharpAttribute(); + break; + } + LLVM_FALLTHROUGH; default: parseStructuralElement(); break; @@ -851,14 +900,14 @@ void UnwrappedLineParser::parsePPUnknown() { addUnwrappedLine(); } -// Here we blacklist certain tokens that are not usually the first token in an +// Here we exclude certain tokens that are not usually the first token in an // unwrapped line. This is used in attempt to distinguish macro calls without // trailing semicolons from other constructs split to several lines. -static bool tokenCanStartNewLine(const clang::Token &Tok) { +static bool tokenCanStartNewLine(const FormatToken &Tok) { // Semicolon can be a null-statement, l_square can be a start of a macro or // a C++11 attribute, but this doesn't seem to be common. return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && - Tok.isNot(tok::l_square) && + Tok.isNot(TT_AttributeSquare) && // Tokens that can only be used as binary operators and a part of // overloaded operator names. Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && @@ -984,11 +1033,11 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::kw_asm: nextToken(); if (FormatTok->is(tok::l_brace)) { - FormatTok->Type = TT_InlineASMBrace; + FormatTok->setType(TT_InlineASMBrace); nextToken(); while (FormatTok && FormatTok->isNot(tok::eof)) { if (FormatTok->is(tok::r_brace)) { - FormatTok->Type = TT_InlineASMBrace; + FormatTok->setType(TT_InlineASMBrace); nextToken(); addUnwrappedLine(); break; @@ -1011,13 +1060,22 @@ void UnwrappedLineParser::parseStructuralElement() { parseAccessSpecifier(); return; case tok::kw_if: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // field/method declaration. + break; parseIfThenElse(); return; case tok::kw_for: case tok::kw_while: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // field/method declaration. + break; parseForOrWhileLoop(); return; case tok::kw_do: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // field/method declaration. + break; parseDoWhile(); return; case tok::kw_switch: @@ -1045,6 +1103,9 @@ void UnwrappedLineParser::parseStructuralElement() { return; case tok::kw_try: case tok::kw___try: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // field/method declaration. + break; parseTryCatch(); return; case tok::kw_extern: @@ -1052,11 +1113,16 @@ void UnwrappedLineParser::parseStructuralElement() { if (FormatTok->Tok.is(tok::string_literal)) { nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BraceWrapping.AfterExternBlock) { - addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/true); + if (!Style.IndentExternBlock) { + if (Style.BraceWrapping.AfterExternBlock) { + addUnwrappedLine(); + } + parseBlock(/*MustBeDeclaration=*/true, + /*AddLevel=*/Style.BraceWrapping.AfterExternBlock); } else { - parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); + parseBlock(/*MustBeDeclaration=*/true, + /*AddLevel=*/Style.IndentExternBlock == + FormatStyle::IEBS_Indent); } addUnwrappedLine(); return; @@ -1274,14 +1340,14 @@ void UnwrappedLineParser::parseStructuralElement() { parseChildBlock(); break; case tok::l_brace: - if (!tryToParseBracedList()) { + if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { // A block outside of parentheses must be the last part of a // structural element. // FIXME: Figure out cases where this is not true, and add projections // for them (the one we know is missing are lambdas). if (Style.BraceWrapping.AfterFunction) addUnwrappedLine(); - FormatTok->Type = TT_FunctionLBrace; + FormatTok->setType(TT_FunctionLBrace); parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); return; @@ -1290,12 +1356,24 @@ void UnwrappedLineParser::parseStructuralElement() { // element continues. break; case tok::kw_try: + if (Style.Language == FormatStyle::LK_JavaScript && + Line->MustBeDeclaration) { + // field/method declaration. + nextToken(); + break; + } // We arrive here when parsing function-try blocks. if (Style.BraceWrapping.AfterFunction) addUnwrappedLine(); parseTryCatch(); return; case tok::identifier: { + if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && + Line->MustBeDeclaration) { + addUnwrappedLine(); + parseCSharpGenericTypeConstraint(); + break; + } if (FormatTok->is(TT_MacroBlockEnd)) { addUnwrappedLine(); return; @@ -1368,7 +1446,7 @@ void UnwrappedLineParser::parseStructuralElement() { : CommentsBeforeNextToken.front()->NewlinesBefore > 0; if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && - tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { + tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { addUnwrappedLine(); return; } @@ -1381,19 +1459,30 @@ void UnwrappedLineParser::parseStructuralElement() { // followed by a curly. if (FormatTok->is(TT_JsFatArrow)) { nextToken(); - if (FormatTok->is(tok::l_brace)) + if (FormatTok->is(tok::l_brace)) { + // C# may break after => if the next character is a newline. + if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) { + // calling `addUnwrappedLine()` here causes odd parsing errors. + FormatTok->MustBreakBefore = true; + } parseChildBlock(); + } break; } nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { + // Block kind should probably be set to BK_BracedInit for any language. + // C# needs this change to ensure that array initialisers and object + // initialisers are indented the same way. + if (Style.isCSharp()) + FormatTok->BlockKind = BK_BracedInit; nextToken(); parseBracedList(); } else if (Style.Language == FormatStyle::LK_Proto && FormatTok->Tok.is(tok::less)) { nextToken(); - parseBracedList(/*ContinueOnSemicolons=*/false, + parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, /*ClosingBraceKind=*/tok::greater); } break; @@ -1410,6 +1499,96 @@ void UnwrappedLineParser::parseStructuralElement() { } while (!eof()); } +bool UnwrappedLineParser::tryToParsePropertyAccessor() { + assert(FormatTok->is(tok::l_brace)); + if (!Style.isCSharp()) + return false; + // See if it's a property accessor. + if (FormatTok->Previous->isNot(tok::identifier)) + return false; + + // See if we are inside a property accessor. + // + // Record the current tokenPosition so that we can advance and + // reset the current token. `Next` is not set yet so we need + // another way to advance along the token stream. + unsigned int StoredPosition = Tokens->getPosition(); + FormatToken *Tok = Tokens->getNextToken(); + + // A trivial property accessor is of the form: + // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } + // Track these as they do not require line breaks to be introduced. + bool HasGetOrSet = false; + bool IsTrivialPropertyAccessor = true; + while (!eof()) { + if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, + tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, + Keywords.kw_set)) { + if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) + HasGetOrSet = true; + Tok = Tokens->getNextToken(); + continue; + } + if (Tok->isNot(tok::r_brace)) + IsTrivialPropertyAccessor = false; + break; + } + + if (!HasGetOrSet) { + Tokens->setPosition(StoredPosition); + return false; + } + + // Try to parse the property accessor: + // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties + Tokens->setPosition(StoredPosition); + if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true) + addUnwrappedLine(); + nextToken(); + do { + switch (FormatTok->Tok.getKind()) { + case tok::r_brace: + nextToken(); + if (FormatTok->is(tok::equal)) { + while (!eof() && FormatTok->isNot(tok::semi)) + nextToken(); + nextToken(); + } + addUnwrappedLine(); + return true; + case tok::l_brace: + ++Line->Level; + parseBlock(/*MustBeDeclaration=*/true); + addUnwrappedLine(); + --Line->Level; + break; + case tok::equal: + if (FormatTok->is(TT_JsFatArrow)) { + ++Line->Level; + do { + nextToken(); + } while (!eof() && FormatTok->isNot(tok::semi)); + nextToken(); + addUnwrappedLine(); + --Line->Level; + break; + } + nextToken(); + break; + default: + if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && + !IsTrivialPropertyAccessor) { + // Non-trivial get/set needs to be on its own line. + addUnwrappedLine(); + } + nextToken(); + } + } while (!eof()); + + // Unreachable for well-formed code (paired '{' and '}'). + return true; +} + bool UnwrappedLineParser::tryToParseLambda() { if (!Style.isCpp()) { nextToken(); @@ -1480,6 +1659,7 @@ bool UnwrappedLineParser::tryToParseLambda() { case tok::lessequal: case tok::question: case tok::colon: + case tok::ellipsis: case tok::kw_true: case tok::kw_false: if (SeenArrow) { @@ -1491,7 +1671,7 @@ bool UnwrappedLineParser::tryToParseLambda() { // This might or might not actually be a lambda arrow (this could be an // ObjC method invocation followed by a dereferencing arrow). We might // reset this back to TT_Unknown in TokenAnnotator. - FormatTok->Type = TT_LambdaArrow; + FormatTok->setType(TT_LambdaArrow); SeenArrow = true; nextToken(); break; @@ -1499,8 +1679,8 @@ bool UnwrappedLineParser::tryToParseLambda() { return true; } } - FormatTok->Type = TT_LambdaLBrace; - LSquare.Type = TT_LambdaLSquare; + FormatTok->setType(TT_LambdaLBrace); + LSquare.setType(TT_LambdaLSquare); parseChildBlock(); return true; } @@ -1533,7 +1713,7 @@ void UnwrappedLineParser::tryToParseJSFunction() { // Consume * (generator function). Treat it like C++'s overloaded operators. if (FormatTok->is(tok::star)) { - FormatTok->Type = TT_OverloadedOperator; + FormatTok->setType(TT_OverloadedOperator); nextToken(); } @@ -1578,12 +1758,24 @@ bool UnwrappedLineParser::tryToParseBracedList() { } bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, + bool IsEnum, tok::TokenKind ClosingBraceKind) { bool HasError = false; // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssigmentExpression() inside. do { + if (Style.isCSharp()) { + if (FormatTok->is(TT_JsFatArrow)) { + nextToken(); + // Fat arrows can be followed by simple expressions or by child blocks + // in curly braces. + if (FormatTok->is(tok::l_brace)) { + parseChildBlock(); + continue; + } + } + } if (Style.Language == FormatStyle::LK_JavaScript) { if (FormatTok->is(Keywords.kw_function) || FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { @@ -1607,6 +1799,8 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, } } if (FormatTok->Tok.getKind() == ClosingBraceKind) { + if (IsEnum && !Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); nextToken(); return !HasError; } @@ -1618,7 +1812,10 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, } break; case tok::l_square: - tryToParseLambda(); + if (Style.isCSharp()) + parseSquare(); + else + tryToParseLambda(); break; case tok::l_paren: parseParens(); @@ -1640,7 +1837,7 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, case tok::less: if (Style.Language == FormatStyle::LK_Proto) { nextToken(); - parseBracedList(/*ContinueOnSemicolons=*/false, + parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, /*ClosingBraceKind=*/tok::greater); } else { nextToken(); @@ -1662,6 +1859,8 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, break; case tok::comma: nextToken(); + if (IsEnum && !Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); break; default: nextToken(); @@ -1768,6 +1967,9 @@ void UnwrappedLineParser::parseIfThenElse() { nextToken(); if (FormatTok->Tok.is(tok::l_paren)) parseParens(); + // handle [[likely]] / [[unlikely]] + if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) + parseSquare(); bool NeedsUnwrappedLine = false; if (FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); @@ -1784,6 +1986,9 @@ void UnwrappedLineParser::parseIfThenElse() { } if (FormatTok->Tok.is(tok::kw_else)) { nextToken(); + // handle [[likely]] / [[unlikely]] + if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute()) + parseSquare(); if (FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); @@ -1810,11 +2015,20 @@ void UnwrappedLineParser::parseTryCatch() { if (FormatTok->is(tok::colon)) { // We are in a function try block, what comes is an initializer list. nextToken(); + + // In case identifiers were removed by clang-tidy, what might follow is + // multiple commas in sequence - before the first identifier. + while (FormatTok->is(tok::comma)) + nextToken(); + while (FormatTok->is(tok::identifier)) { nextToken(); if (FormatTok->is(tok::l_paren)) parseParens(); - if (FormatTok->is(tok::comma)) + + // In case identifiers were removed by clang-tidy, what might follow is + // multiple commas in sequence - after the first identifier. + while (FormatTok->is(tok::comma)) nextToken(); } } @@ -1898,7 +2112,7 @@ void UnwrappedLineParser::parseNamespace() { DeclarationScopeStack.size() > 1); parseBlock(/*MustBeDeclaration=*/true, AddLevel); // Munch the semicolon after a namespace. This is more common than one would - // think. Puttin the semicolon into its own line is very ugly. + // think. Putting the semicolon into its own line is very ugly. if (FormatTok->Tok.is(tok::semi)) nextToken(); addUnwrappedLine(); @@ -1909,6 +2123,19 @@ void UnwrappedLineParser::parseNamespace() { void UnwrappedLineParser::parseNew() { assert(FormatTok->is(tok::kw_new) && "'new' expected"); nextToken(); + + if (Style.isCSharp()) { + do { + if (FormatTok->is(tok::l_brace)) + parseBracedList(); + + if (FormatTok->isOneOf(tok::semi, tok::comma)) + return; + + nextToken(); + } while (!eof()); + } + if (Style.Language != FormatStyle::LK_Java) return; @@ -1959,7 +2186,7 @@ void UnwrappedLineParser::parseDoWhile() { if (FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false); - if (Style.BraceWrapping.IndentBraces) + if (Style.BraceWrapping.BeforeWhile) addUnwrappedLine(); } else { addUnwrappedLine(); @@ -1985,15 +2212,21 @@ void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { --Line->Level; if (LeftAlignLabel) Line->Level = 0; - if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { + if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && + FormatTok->Tok.is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Line->Level, Style.BraceWrapping.AfterCaseLabel, Style.BraceWrapping.IndentBraces); parseBlock(/*MustBeDeclaration=*/false); if (FormatTok->Tok.is(tok::kw_break)) { if (Style.BraceWrapping.AfterControlStatement == - FormatStyle::BWACS_Always) + FormatStyle::BWACS_Always) { addUnwrappedLine(); + if (!Style.IndentCaseBlocks && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { + Line->Level++; + } + } parseStructuralElement(); } addUnwrappedLine(); @@ -2097,9 +2330,18 @@ bool UnwrappedLineParser::parseEnum() { return true; } + if (!Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); // Parse enum body. nextToken(); - bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); + if (!Style.AllowShortEnumsOnASingleLine) { + addUnwrappedLine(); + Line->Level += 1; + } + bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, + /*IsEnum=*/true); + if (!Style.AllowShortEnumsOnASingleLine) + Line->Level -= 1; if (HasError) { if (FormatTok->is(tok::semi)) nextToken(); @@ -2112,6 +2354,51 @@ bool UnwrappedLineParser::parseEnum() { // "} n, m;" will end up in one unwrapped line. } +namespace { +// A class used to set and restore the Token position when peeking +// ahead in the token source. +class ScopedTokenPosition { + unsigned StoredPosition; + FormatTokenSource *Tokens; + +public: + ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { + assert(Tokens && "Tokens expected to not be null"); + StoredPosition = Tokens->getPosition(); + } + + ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } +}; +} // namespace + +// Look to see if we have [[ by looking ahead, if +// its not then rewind to the original position. +bool UnwrappedLineParser::tryToParseSimpleAttribute() { + ScopedTokenPosition AutoPosition(Tokens); + FormatToken *Tok = Tokens->getNextToken(); + // We already read the first [ check for the second. + if (Tok && !Tok->is(tok::l_square)) { + return false; + } + // Double check that the attribute is just something + // fairly simple. + while (Tok) { + if (Tok->is(tok::r_square)) { + break; + } + Tok = Tokens->getNextToken(); + } + Tok = Tokens->getNextToken(); + if (Tok && !Tok->is(tok::r_square)) { + return false; + } + Tok = Tokens->getNextToken(); + if (Tok && Tok->is(tok::semi)) { + return false; + } + return true; +} + void UnwrappedLineParser::parseJavaEnumBody() { // Determine whether the enum is simple, i.e. does not have a semicolon or // constants with class bodies. Simple enums can be formatted like braced @@ -2181,9 +2468,10 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { // The actual identifier can be a nested name specifier, and in macros // it is often token-pasted. + // An [[attribute]] can be before the identifier. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, tok::kw___attribute, tok::kw___declspec, - tok::kw_alignas) || + tok::kw_alignas, tok::l_square, tok::r_square) || ((Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) && FormatTok->isOneOf(tok::period, tok::comma))) { @@ -2203,8 +2491,16 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { FormatTok->TokenText != FormatTok->TokenText.upper(); nextToken(); // We can have macros or attributes in between 'class' and the class name. - if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) - parseParens(); + if (!IsNonMacroIdentifier) { + if (FormatTok->Tok.is(tok::l_paren)) { + parseParens(); + } else if (FormatTok->is(TT_AttributeSquare)) { + parseSquare(); + // Consume the closing TT_AttributeSquare. + if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) + nextToken(); + } + } } // Note that parsing away template declarations here leads to incorrectly @@ -2226,6 +2522,12 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { } if (FormatTok->Tok.is(tok::semi)) return; + if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { + addUnwrappedLine(); + nextToken(); + parseCSharpGenericTypeConstraint(); + break; + } nextToken(); } } @@ -2451,8 +2753,8 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, E = Line.Tokens.end(); I != E; ++I) { llvm::dbgs() << I->Tok->Tok.getName() << "[" - << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn - << "] "; + << "T=" << I->Tok->getType() + << ", OC=" << I->Tok->OriginalColumn << "] "; } for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2723,18 +3025,19 @@ void UnwrappedLineParser::readToken(int LevelDifference) { flushComments(isOnNewLine(*FormatTok)); parsePPDirective(); } - while (FormatTok->Type == TT_ConflictStart || - FormatTok->Type == TT_ConflictEnd || - FormatTok->Type == TT_ConflictAlternative) { - if (FormatTok->Type == TT_ConflictStart) { + while (FormatTok->getType() == TT_ConflictStart || + FormatTok->getType() == TT_ConflictEnd || + FormatTok->getType() == TT_ConflictAlternative) { + if (FormatTok->getType() == TT_ConflictStart) { conditionalCompilationStart(/*Unreachable=*/false); - } else if (FormatTok->Type == TT_ConflictAlternative) { + } else if (FormatTok->getType() == TT_ConflictAlternative) { conditionalCompilationAlternative(); - } else if (FormatTok->Type == TT_ConflictEnd) { + } else if (FormatTok->getType() == TT_ConflictEnd) { conditionalCompilationEnd(); } FormatTok = Tokens->getNextToken(); FormatTok->MustBreakBefore = true; + FormatTok->MustBreakAlignBefore = true; } if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && @@ -2759,6 +3062,7 @@ void UnwrappedLineParser::pushToken(FormatToken *Tok) { Line->Tokens.push_back(UnwrappedLineNode(Tok)); if (MustBreakBeforeNextToken) { Line->Tokens.back().Tok->MustBreakBefore = true; + Line->Tokens.back().Tok->MustBreakAlignBefore = true; MustBreakBeforeNextToken = false; } } diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 5d9bafc429a7..8b3aa4c84edb 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -98,7 +98,7 @@ private: void readTokenWithJavaScriptASI(); void parseStructuralElement(); bool tryToParseBracedList(); - bool parseBracedList(bool ContinueOnSemicolons = false, + bool parseBracedList(bool ContinueOnSemicolons = false, bool IsEnum = false, tok::TokenKind ClosingBraceKind = tok::r_brace); void parseParens(); void parseSquare(bool LambdaIntroducer = false); @@ -125,9 +125,16 @@ private: bool parseObjCProtocol(); void parseJavaScriptEs6ImportExport(); void parseStatementMacro(); + void parseCSharpAttribute(); + // Parse a C# generic type constraint: `where T : IComparable<T>`. + // See: + // https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/where-generic-type-constraint + void parseCSharpGenericTypeConstraint(); bool tryToParseLambda(); bool tryToParseLambdaIntroducer(); + bool tryToParsePropertyAccessor(); void tryToParseJSFunction(); + bool tryToParseSimpleAttribute(); void addUnwrappedLine(); bool eof() const; // LevelDifference is the difference of levels after and before the current diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index 5a44500d355f..32e0b685ea0f 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -30,28 +30,29 @@ WhitespaceManager::Change::Change(const FormatToken &Tok, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, + StringRef CurrentLinePrefix, bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken) : Tok(&Tok), CreateReplacement(CreateReplacement), OriginalWhitespaceRange(OriginalWhitespaceRange), StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), PreviousLinePostfix(PreviousLinePostfix), - CurrentLinePrefix(CurrentLinePrefix), + CurrentLinePrefix(CurrentLinePrefix), IsAligned(IsAligned), ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), - StartOfBlockComment(nullptr), IndentationOffset(0) {} + StartOfBlockComment(nullptr), IndentationOffset(0), ConditionalsLevel(0) { +} void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, - bool InPPDirective) { + bool IsAligned, bool InPPDirective) { if (Tok.Finalized) return; Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, Spaces, StartOfTokenColumn, Newlines, "", "", - InPPDirective && !Tok.IsFirst, + IsAligned, InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/false)); } @@ -62,7 +63,7 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, Changes.push_back(Change(Tok, /*CreateReplacement=*/false, Tok.WhitespaceRange, /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "", - InPPDirective && !Tok.IsFirst, + /*IsAligned=*/false, InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/false)); } @@ -82,7 +83,8 @@ void WhitespaceManager::replaceWhitespaceInToken( Change(Tok, /*CreateReplacement=*/true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces, std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix, - InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true)); + /*IsAligned=*/true, InPPDirective && !Tok.IsFirst, + /*IsInsideToken=*/true)); } const tooling::Replacements &WhitespaceManager::generateReplacements() { @@ -93,7 +95,9 @@ const tooling::Replacements &WhitespaceManager::generateReplacements() { calculateLineBreakInformation(); alignConsecutiveMacros(); alignConsecutiveDeclarations(); + alignConsecutiveBitFields(); alignConsecutiveAssignments(); + alignChainedConditionals(); alignTrailingComments(); alignEscapedNewlines(); generateChanges(); @@ -226,6 +230,33 @@ void WhitespaceManager::calculateLineBreakInformation() { LastBlockComment = nullptr; } } + + // Compute conditional nesting level + // Level is increased for each conditional, unless this conditional continues + // a chain of conditional, i.e. starts immediately after the colon of another + // conditional. + SmallVector<bool, 16> ScopeStack; + int ConditionalsLevel = 0; + for (auto &Change : Changes) { + for (unsigned i = 0, e = Change.Tok->FakeLParens.size(); i != e; ++i) { + bool isNestedConditional = + Change.Tok->FakeLParens[e - 1 - i] == prec::Conditional && + !(i == 0 && Change.Tok->Previous && + Change.Tok->Previous->is(TT_ConditionalExpr) && + Change.Tok->Previous->is(tok::colon)); + if (isNestedConditional) + ++ConditionalsLevel; + ScopeStack.push_back(isNestedConditional); + } + + Change.ConditionalsLevel = ConditionalsLevel; + + for (unsigned i = Change.Tok->FakeRParens; i > 0 && ScopeStack.size(); + --i) { + if (ScopeStack.pop_back_val()) + --ConditionalsLevel; + } + } } // Align a single sequence of tokens, see AlignTokens below. @@ -247,6 +278,7 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, // double z); // In the above example, we need to take special care to ensure that // 'double z' is indented along with it's owning function 'b'. + // Special handling is required for 'nested' ternary operators. SmallVector<unsigned, 16> ScopeStack; for (unsigned i = Start; i != End; ++i) { @@ -287,7 +319,10 @@ AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, unsigned ScopeStart = ScopeStack.back(); if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) || (ScopeStart > Start + 1 && - Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName))) + Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)) || + Changes[i].Tok->is(TT_ConditionalExpr) || + (Changes[i].Tok->Previous && + Changes[i].Tok->Previous->is(TT_ConditionalExpr))) Changes[i].Spaces += Shift; } @@ -340,7 +375,7 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, // abort when we hit any token in a higher scope than the starting one. auto IndentAndNestingLevel = StartAt < Changes.size() ? Changes[StartAt].indentAndNestingLevel() - : std::pair<unsigned, unsigned>(0, 0); + : std::tuple<unsigned, unsigned, unsigned>(); // Keep track of the number of commas before the matching tokens, we will only // align a sequence of matching tokens if they are preceded by the same number @@ -376,9 +411,11 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, if (Changes[i].NewlinesBefore != 0) { CommasBeforeMatch = 0; EndOfSequence = i; - // If there is a blank line, or if the last line didn't contain any - // matching token, the sequence ends here. - if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine) + // If there is a blank line, there is a forced-align-break (eg, + // preprocessor), or if the last line didn't contain any matching token, + // the sequence ends here. + if (Changes[i].NewlinesBefore > 1 || + Changes[i].Tok->MustBreakAlignBefore || !FoundMatchOnLine) AlignCurrentSequence(); FoundMatchOnLine = false; @@ -408,9 +445,17 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, StartOfSequence = i; unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; - int LineLengthAfter = -Changes[i].Spaces; - for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j) - LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength; + int LineLengthAfter = Changes[i].TokenLength; + for (unsigned j = i + 1; j != e && Changes[j].NewlinesBefore == 0; ++j) { + LineLengthAfter += Changes[j].Spaces; + // Changes are generally 1:1 with the tokens, but a change could also be + // inside of a token, in which case it's counted more than once: once for + // the whitespace surrounding the token (!IsInsideToken) and once for + // each whitespace change within it (IsInsideToken). + // Therefore, changes inside of a token should only count the space. + if (!Changes[j].IsInsideToken) + LineLengthAfter += Changes[j].TokenLength; + } unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter; // If we are restricted by the maximum column width, end the sequence. @@ -573,6 +618,26 @@ void WhitespaceManager::alignConsecutiveAssignments() { Changes, /*StartAt=*/0); } +void WhitespaceManager::alignConsecutiveBitFields() { + if (!Style.AlignConsecutiveBitFields) + return; + + AlignTokens( + Style, + [&](Change const &C) { + // Do not align on ':' that is first on a line. + if (C.NewlinesBefore > 0) + return false; + + // Do not align on ':' that is last on a line. + if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0) + return false; + + return C.Tok->is(TT_BitFieldColon); + }, + Changes, /*StartAt=*/0); +} + void WhitespaceManager::alignConsecutiveDeclarations() { if (!Style.AlignConsecutiveDeclarations) return; @@ -607,6 +672,50 @@ void WhitespaceManager::alignConsecutiveDeclarations() { Changes, /*StartAt=*/0); } +void WhitespaceManager::alignChainedConditionals() { + if (Style.BreakBeforeTernaryOperators) { + AlignTokens( + Style, + [](Change const &C) { + // Align question operators and last colon + return C.Tok->is(TT_ConditionalExpr) && + ((C.Tok->is(tok::question) && !C.NewlinesBefore) || + (C.Tok->is(tok::colon) && C.Tok->Next && + (C.Tok->Next->FakeLParens.size() == 0 || + C.Tok->Next->FakeLParens.back() != prec::Conditional))); + }, + Changes, /*StartAt=*/0); + } else { + static auto AlignWrappedOperand = [](Change const &C) { + auto Previous = C.Tok->getPreviousNonComment(); // Previous; + return C.NewlinesBefore && Previous && Previous->is(TT_ConditionalExpr) && + (Previous->is(tok::question) || + (Previous->is(tok::colon) && + (C.Tok->FakeLParens.size() == 0 || + C.Tok->FakeLParens.back() != prec::Conditional))); + }; + // Ensure we keep alignment of wrapped operands with non-wrapped operands + // Since we actually align the operators, the wrapped operands need the + // extra offset to be properly aligned. + for (Change &C : Changes) { + if (AlignWrappedOperand(C)) + C.StartOfTokenColumn -= 2; + } + AlignTokens( + Style, + [this](Change const &C) { + // Align question operators if next operand is not wrapped, as + // well as wrapped operands after question operator or last + // colon in conditional sequence + return (C.Tok->is(TT_ConditionalExpr) && C.Tok->is(tok::question) && + &C != &Changes.back() && (&C + 1)->NewlinesBefore == 0 && + !(&C + 1)->IsTrailingComment) || + AlignWrappedOperand(C); + }, + Changes, /*StartAt=*/0); + } +} + void WhitespaceManager::alignTrailingComments() { unsigned MinColumn = 0; unsigned MaxColumn = UINT_MAX; @@ -617,6 +726,8 @@ void WhitespaceManager::alignTrailingComments() { if (Changes[i].StartOfBlockComment) continue; Newlines += Changes[i].NewlinesBefore; + if (Changes[i].Tok->MustBreakAlignBefore) + BreakBeforeNext = true; if (!Changes[i].IsTrailingComment) continue; @@ -761,9 +872,9 @@ void WhitespaceManager::generateChanges() { C.EscapedNewlineColumn); else appendNewlineText(ReplacementText, C.NewlinesBefore); - appendIndentText(ReplacementText, C.Tok->IndentLevel, - std::max(0, C.Spaces), - C.StartOfTokenColumn - std::max(0, C.Spaces)); + appendIndentText( + ReplacementText, C.Tok->IndentLevel, std::max(0, C.Spaces), + C.StartOfTokenColumn - std::max(0, C.Spaces), C.IsAligned); ReplacementText.append(C.CurrentLinePrefix); storeReplacement(C.OriginalWhitespaceRange, ReplacementText); } @@ -809,7 +920,8 @@ void WhitespaceManager::appendEscapedNewlineText( void WhitespaceManager::appendIndentText(std::string &Text, unsigned IndentLevel, unsigned Spaces, - unsigned WhitespaceStartColumn) { + unsigned WhitespaceStartColumn, + bool IsAligned) { switch (Style.UseTab) { case FormatStyle::UT_Never: Text.append(Spaces, ' '); @@ -838,28 +950,39 @@ void WhitespaceManager::appendIndentText(std::string &Text, case FormatStyle::UT_ForIndentation: if (WhitespaceStartColumn == 0) { unsigned Indentation = IndentLevel * Style.IndentWidth; - // This happens, e.g. when a line in a block comment is indented less than - // the first one. - if (Indentation > Spaces) - Indentation = Spaces; - if (Style.TabWidth) { - unsigned Tabs = Indentation / Style.TabWidth; - Text.append(Tabs, '\t'); - Spaces -= Tabs * Style.TabWidth; - } + Spaces = appendTabIndent(Text, Spaces, Indentation); } Text.append(Spaces, ' '); break; case FormatStyle::UT_ForContinuationAndIndentation: - if (WhitespaceStartColumn == 0 && Style.TabWidth) { - unsigned Tabs = Spaces / Style.TabWidth; - Text.append(Tabs, '\t'); - Spaces -= Tabs * Style.TabWidth; + if (WhitespaceStartColumn == 0) + Spaces = appendTabIndent(Text, Spaces, Spaces); + Text.append(Spaces, ' '); + break; + case FormatStyle::UT_AlignWithSpaces: + if (WhitespaceStartColumn == 0) { + unsigned Indentation = + IsAligned ? IndentLevel * Style.IndentWidth : Spaces; + Spaces = appendTabIndent(Text, Spaces, Indentation); } Text.append(Spaces, ' '); break; } } +unsigned WhitespaceManager::appendTabIndent(std::string &Text, unsigned Spaces, + unsigned Indentation) { + // This happens, e.g. when a line in a block comment is indented less than the + // first one. + if (Indentation > Spaces) + Indentation = Spaces; + if (Style.TabWidth) { + unsigned Tabs = Indentation / Style.TabWidth; + Text.append(Tabs, '\t'); + Spaces -= Tabs * Style.TabWidth; + } + return Spaces; +} + } // namespace format } // namespace clang diff --git a/clang/lib/Format/WhitespaceManager.h b/clang/lib/Format/WhitespaceManager.h index f47bf40204b3..1398a3aee2b8 100644 --- a/clang/lib/Format/WhitespaceManager.h +++ b/clang/lib/Format/WhitespaceManager.h @@ -19,6 +19,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include <string> +#include <tuple> namespace clang { namespace format { @@ -49,7 +50,7 @@ public: /// this replacement. It is needed for determining how \p Spaces is turned /// into tabs and spaces for some format styles. void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, - unsigned StartOfTokenColumn, + unsigned StartOfTokenColumn, bool isAligned = false, bool InPPDirective = false); /// Adds information about an unchangeable token's whitespace. @@ -109,7 +110,7 @@ public: SourceRange OriginalWhitespaceRange, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, - bool ContinuesPPDirective, bool IsInsideToken); + bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken); // The kind of the token whose whitespace this change replaces, or in which // this change inserts whitespace. @@ -125,6 +126,7 @@ public: unsigned NewlinesBefore; std::string PreviousLinePostfix; std::string CurrentLinePrefix; + bool IsAligned; bool ContinuesPPDirective; // The number of spaces in front of the token or broken part of the token. @@ -157,11 +159,16 @@ public: const Change *StartOfBlockComment; int IndentationOffset; - // A combination of indent level and nesting level, which are used in - // tandem to compute lexical scope, for the purposes of deciding + // Depth of conditionals. Computed from tracking fake parenthesis, except + // it does not increase the indent for "chained" conditionals. + int ConditionalsLevel; + + // A combination of indent, nesting and conditionals levels, which are used + // in tandem to compute lexical scope, for the purposes of deciding // when to stop consecutive alignment runs. - std::pair<unsigned, unsigned> indentAndNestingLevel() const { - return std::make_pair(Tok->IndentLevel, Tok->NestingLevel); + std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const { + return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel, + ConditionalsLevel); } }; @@ -177,9 +184,15 @@ private: /// Align consecutive assignments over all \c Changes. void alignConsecutiveAssignments(); + /// Align consecutive bitfields over all \c Changes. + void alignConsecutiveBitFields(); + /// Align consecutive declarations over all \c Changes. void alignConsecutiveDeclarations(); + /// Align consecutive declarations over all \c Changes. + void alignChainedConditionals(); + /// Align trailing comments over all \c Changes. void alignTrailingComments(); @@ -204,7 +217,10 @@ private: unsigned PreviousEndOfTokenColumn, unsigned EscapedNewlineColumn); void appendIndentText(std::string &Text, unsigned IndentLevel, - unsigned Spaces, unsigned WhitespaceStartColumn); + unsigned Spaces, unsigned WhitespaceStartColumn, + bool IsAligned); + unsigned appendTabIndent(std::string &Text, unsigned Spaces, + unsigned Indentation); SmallVector<Change, 16> Changes; const SourceManager &SourceMgr; |