diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-05-27 18:47:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-05-27 18:47:56 +0000 |
commit | 5e20cdd81c44a443562a09007668ffdf76c455af (patch) | |
tree | dbbd4047878da71c1a706e26ce05b4e7791b14cc /lib/Format | |
parent | d5f23b0b7528b5c3caed1ba14f897cc4aaa9e3c3 (diff) |
Notes
Diffstat (limited to 'lib/Format')
-rw-r--r-- | lib/Format/BreakableToken.cpp | 18 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 184 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 292 | ||||
-rw-r--r-- | lib/Format/FormatToken.cpp | 55 | ||||
-rw-r--r-- | lib/Format/FormatToken.h | 194 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 433 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.h | 22 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 941 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.h | 132 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 361 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.h | 16 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 100 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 7 |
13 files changed, 1788 insertions, 967 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 26f1371b4092e..66e935abdf558 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -106,7 +106,7 @@ getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); } - if (Chars > MaxSplit || Text.size() == Advance) + if (Chars > MaxSplit || Text.size() <= Advance) break; if (IsBlank(Text[0])) @@ -277,6 +277,8 @@ BreakableBlockComment::BreakableBlockComment( // If the last line is empty, the closing "*/" will have a star. if (i + 1 == e && Lines[i].empty()) break; + if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i])) + continue; while (!Lines[i].startswith(Decoration)) Decoration = Decoration.substr(0, Decoration.size() - 1); } @@ -297,14 +299,18 @@ BreakableBlockComment::BreakableBlockComment( } continue; } + // The first line already excludes the star. // For all other lines, adjust the line to exclude the star and // (optionally) the first whitespace. - StartOfLineColumn[i] += Decoration.size(); - Lines[i] = Lines[i].substr(Decoration.size()); - LeadingWhitespace[i] += Decoration.size(); - IndentAtLineBreak = - std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); + unsigned DecorationSize = + Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size(); + StartOfLineColumn[i] += DecorationSize; + Lines[i] = Lines[i].substr(DecorationSize); + LeadingWhitespace[i] += DecorationSize; + if (!Decoration.startswith(Lines[i])) + IndentAtLineBreak = + std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); } IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); DEBUG({ diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 4cc92b02a9e58..4e8f5af263d2f 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -143,11 +143,10 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) return true; if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || - (Style.BreakBeforeTernaryOperators && - (Current.is(tok::question) || - (Current.is(TT_ConditionalExpr) && Previous.isNot(tok::question)))) || + (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) && + Previous.isNot(tok::question)) || (!Style.BreakBeforeTernaryOperators && - (Previous.is(tok::question) || Previous.is(TT_ConditionalExpr)))) && + Previous.is(TT_ConditionalExpr))) && State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() && !Current.isOneOf(tok::r_paren, tok::r_brace)) return true; @@ -166,10 +165,17 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) || Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0)) return true; + if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound && + State.Stack.back().BreakBeforeParameter) + return true; if (State.Column < getNewLineColumn(State)) return false; - if (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None) { + + // Using CanBreakBefore here and below takes care of the decision whether the + // current style uses wrapping before or after operators for the given + // operator. + if (Previous.is(TT_BinaryOperator) && Current.CanBreakBefore) { // If we need to break somewhere inside the LHS of a binary expression, we // should also break after the operator. Otherwise, the formatting would // hide the operator precedence, e.g. in: @@ -185,16 +191,13 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { Previous.Previous->isNot(TT_BinaryOperator); // For >>. bool LHSIsBinaryExpr = Previous.Previous && Previous.Previous->EndsBinaryExpression; - if (Previous.is(TT_BinaryOperator) && (!IsComparison || LHSIsBinaryExpr) && - Current.isNot(TT_BinaryOperator) && // For >>. - !Current.isTrailingComment() && !Previous.is(tok::lessless) && + if ((!IsComparison || LHSIsBinaryExpr) && !Current.isTrailingComment() && Previous.getPrecedence() != prec::Assignment && State.Stack.back().BreakBeforeParameter) return true; - } else { - if (Current.is(TT_BinaryOperator) && Previous.EndsBinaryExpression && - State.Stack.back().BreakBeforeParameter) - return true; + } else if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore && + State.Stack.back().BreakBeforeParameter) { + return true; } // Same as above, but for the first "<<" operator. @@ -203,12 +206,14 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { State.Stack.back().FirstLessLess == 0) return true; - if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound && - State.Stack.back().BreakBeforeParameter) - return true; if (Current.NestingLevel == 0 && !Current.isTrailingComment()) { + // Always break after "template <...>" and leading annotations. This is only + // for cases where the entire line does not fit on a single line as a + // different LineFormatter would be used otherwise. if (Previous.ClosesTemplateDeclaration) return true; + if (Previous.is(TT_FunctionAnnotationRParen)) + return true; if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) && Current.isNot(TT_LeadingJavaAnnotation)) return true; @@ -221,8 +226,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (startsSegmentOfBuilderTypeCall(Current) && (State.Stack.back().CallContinuation != 0 || - (State.Stack.back().BreakBeforeParameter && - State.Stack.back().ContainsUnwrappedBuilder))) + State.Stack.back().BreakBeforeParameter)) return true; // The following could be precomputed as they do not depend on the state. @@ -232,6 +236,10 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { Previous.is(tok::l_brace) && !Current.isOneOf(tok::r_brace, tok::comment)) return true; + if (Current.is(tok::lessless) && Previous.is(tok::identifier) && + Previous.TokenText == "endl") + return true; + return false; } @@ -245,12 +253,18 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, (Current.Previous->Tok.getIdentifierInfo() == nullptr || Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_not_keyword))) { - // FIXME: Is this correct? - int WhitespaceLength = SourceMgr.getSpellingColumnNumber( - State.NextToken->WhitespaceRange.getEnd()) - - SourceMgr.getSpellingColumnNumber( - State.NextToken->WhitespaceRange.getBegin()); - State.Column += WhitespaceLength; + unsigned EndColumn = + SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getEnd()); + if (Current.LastNewlineOffset != 0) { + // If there is a newline within this token, the final column will solely + // determined by the current end column. + State.Column = EndColumn; + } else { + unsigned StartColumn = + SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getBegin()); + assert(EndColumn >= StartColumn); + State.Column += EndColumn - StartColumn; + } moveStateToNextToken(State, DryRun, /*Newline=*/false); return 0; } @@ -297,7 +311,9 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, else if (State.Stack.back().Indent + Current.LongestObjCSelectorName > State.Column + Spaces + Current.ColumnWidth) State.Stack.back().ColonPos = - State.Stack.back().Indent + Current.LongestObjCSelectorName; + std::max(State.FirstIndent + Style.ContinuationIndentWidth, + State.Stack.back().Indent) + + Current.LongestObjCSelectorName; else State.Stack.back().ColonPos = State.Column + Spaces + Current.ColumnWidth; } @@ -308,9 +324,12 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, State.Stack.back().Indent = State.Column + Spaces; if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style)) State.Stack.back().NoLineBreak = true; - if (startsSegmentOfBuilderTypeCall(Current)) + if (startsSegmentOfBuilderTypeCall(Current) && + State.Column > getNewLineColumn(State)) State.Stack.back().ContainsUnwrappedBuilder = true; + if (Current.is(TT_LambdaArrow)) + State.Stack.back().NoLineBreak = true; if (Current.isMemberAccess() && Previous.is(tok::r_paren) && (Previous.MatchingParen && (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) { @@ -359,7 +378,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, const FormatToken *Next = Previous.MatchingParen->getNextNonComment(); HasTrailingCall = Next && Next->isMemberAccess(); } - if (HasTrailingCall && + if (HasTrailingCall && State.Stack.size() > 1 && State.Stack[State.Stack.size() - 2].CallContinuation == 0) State.Stack.back().LastSpace = State.Column; } @@ -406,7 +425,11 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, State.Stack.back().AlignColons = false; } else { State.Stack.back().ColonPos = - State.Stack.back().Indent + NextNonComment->LongestObjCSelectorName; + (Style.IndentWrappedFunctionNames + ? std::max(State.Stack.back().Indent, + State.FirstIndent + Style.ContinuationIndentWidth) + : State.Stack.back().Indent) + + NextNonComment->LongestObjCSelectorName; } } else if (State.Stack.back().AlignColons && State.Stack.back().ColonPos <= NextNonComment->ColumnWidth) { @@ -468,8 +491,9 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, !PreviousNonComment->isOneOf(tok::comma, tok::semi) && (PreviousNonComment->isNot(TT_TemplateCloser) || Current.NestingLevel != 0) && - !PreviousNonComment->isOneOf(TT_BinaryOperator, TT_JavaAnnotation, - TT_LeadingJavaAnnotation) && + !PreviousNonComment->isOneOf( + TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation, + TT_LeadingJavaAnnotation) && Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope()) State.Stack.back().BreakBeforeParameter = true; @@ -516,7 +540,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (NextNonComment->is(tok::l_brace) && NextNonComment->BlockKind == BK_Block) return Current.NestingLevel == 0 ? State.FirstIndent : State.Stack.back().Indent; - if (Current.isOneOf(tok::r_brace, tok::r_square)) { + if (Current.isOneOf(tok::r_brace, tok::r_square) && State.Stack.size() > 1) { if (Current.closesBlockTypeList(Style)) return State.Stack[State.Stack.size() - 2].NestedBlockIndent; if (Current.MatchingParen && @@ -529,6 +553,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return State.Stack.back().Indent; if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) return State.StartOfStringLiteral; + if (NextNonComment->is(TT_ObjCStringLiteral) && + State.StartOfStringLiteral != 0) + return State.StartOfStringLiteral - 1; if (NextNonComment->is(tok::lessless) && State.Stack.back().FirstLessLess != 0) return State.Stack.back().FirstLessLess; @@ -546,8 +573,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return State.Stack.back().VariablePos; if ((PreviousNonComment && (PreviousNonComment->ClosesTemplateDeclaration || - PreviousNonComment->isOneOf(TT_AttributeParen, TT_JavaAnnotation, - TT_LeadingJavaAnnotation))) || + PreviousNonComment->isOneOf( + TT_AttributeParen, TT_FunctionAnnotationRParen, TT_JavaAnnotation, + TT_LeadingJavaAnnotation))) || (!Style.IndentWrappedFunctionNames && NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent); @@ -555,7 +583,10 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (!State.Stack.back().ObjCSelectorNameFound) { if (NextNonComment->LongestObjCSelectorName == 0) return State.Stack.back().Indent; - return State.Stack.back().Indent + + return (Style.IndentWrappedFunctionNames + ? std::max(State.Stack.back().Indent, + State.FirstIndent + Style.ContinuationIndentWidth) + : State.Stack.back().Indent) + NextNonComment->LongestObjCSelectorName - NextNonComment->ColumnWidth; } @@ -570,10 +601,16 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return State.Stack.back().StartOfArraySubscripts; return ContinuationIndent; } - if (NextNonComment->is(TT_StartOfName) || - Previous.isOneOf(tok::coloncolon, tok::equal)) { + + // This ensure that we correctly format ObjC methods calls without inputs, + // i.e. where the last element isn't selector like: [callee method]; + if (NextNonComment->is(tok::identifier) && NextNonComment->FakeRParens == 0 && + NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr)) + return State.Stack.back().Indent; + + if (NextNonComment->isOneOf(TT_StartOfName, TT_PointerOrReference) || + Previous.isOneOf(tok::coloncolon, tok::equal)) return ContinuationIndent; - } if (PreviousNonComment && PreviousNonComment->is(tok::colon) && PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) return ContinuationIndent; @@ -621,7 +658,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, std::min(State.LowestLevelOnLine, Current.NestingLevel); if (Current.isMemberAccess()) State.Stack.back().StartOfFunctionCall = - Current.LastOperator ? 0 : State.Column + Current.ColumnWidth; + Current.LastOperator ? 0 : State.Column; if (Current.is(TT_SelectorName)) State.Stack.back().ObjCSelectorNameFound = true; if (Current.is(TT_CtorInitializerColon)) { @@ -637,12 +674,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Stack.back().AvoidBinPacking = true; State.Stack.back().BreakBeforeParameter = false; } - - // In ObjC method declaration we align on the ":" of parameters, but we need - // to ensure that we indent parameters on subsequent lines by at least our - // continuation indent width. - if (Current.is(TT_ObjCMethodSpecifier)) - State.Stack.back().Indent += Style.ContinuationIndentWidth; + if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline) + State.Stack.back().NestedBlockIndent = + State.Column + Current.ColumnWidth + 1; // Insert scopes created by fake parenthesis. const FormatToken *Previous = Current.getPreviousNonComment(); @@ -675,12 +709,13 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, moveStatePastScopeCloser(State); moveStatePastFakeRParens(State); - if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) { + if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column; - } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && - !Current.isStringLiteral()) { + if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0) + State.StartOfStringLiteral = State.Column + 1; + else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && + !Current.isStringLiteral()) State.StartOfStringLiteral = 0; - } State.Column += Current.ColumnWidth; State.NextToken = State.NextToken->Next; @@ -712,7 +747,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, // 'return', assignments or opening <({[. The indentation for these cases // is special cased. bool SkipFirstExtraIndent = - (Previous && (Previous->opensScope() || Previous->is(tok::kw_return) || + (Previous && (Previous->opensScope() || + Previous->isOneOf(tok::semi, tok::kw_return) || (Previous->getPrecedence() == prec::Assignment && Style.AlignOperands) || Previous->is(TT_ObjCMethodExpr))); @@ -783,7 +819,6 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) { for (unsigned i = 0, e = State.NextToken->FakeRParens; i != e; ++i) { unsigned VariablePos = State.Stack.back().VariablePos; - assert(State.Stack.size() > 1); if (State.Stack.size() == 1) { // Do not pop the last element. break; @@ -806,6 +841,7 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, unsigned NewIndent; unsigned NewIndentLevel = State.Stack.back().IndentLevel; + unsigned LastSpace = State.Stack.back().LastSpace; bool AvoidBinPacking; bool BreakBeforeParameter = false; if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { @@ -815,17 +851,28 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, ++NewIndentLevel; } else { NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth; - NewIndent = std::min(State.Column + 1, NewIndent); } const FormatToken *NextNoComment = Current.getNextNonComment(); AvoidBinPacking = Current.isOneOf(TT_ArrayInitializerLSquare, TT_DictLiteral) || - Style.Language == FormatStyle::LK_Proto || !Style.BinPackParameters || + Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments || (NextNoComment && NextNoComment->is(TT_DesignatedInitializerPeriod)); } else { NewIndent = Style.ContinuationIndentWidth + std::max(State.Stack.back().LastSpace, State.Stack.back().StartOfFunctionCall); + + // Ensure that different different brackets force relative alignment, e.g.: + // void SomeFunction(vector< // break + // int> v); + // FIXME: We likely want to do this for more combinations of brackets. + // Verify that it is wanted for ObjC, too. + if (Current.Tok.getKind() == tok::less && + Current.ParentBracket == tok::l_paren) { + NewIndent = std::max(NewIndent, State.Stack.back().Indent); + LastSpace = std::max(LastSpace, State.Stack.back().Indent); + } + AvoidBinPacking = (State.Line->MustBeDeclaration && !Style.BinPackParameters) || (!State.Line->MustBeDeclaration && !Style.BinPackArguments) || @@ -833,19 +880,33 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, (Current.PackingKind == PPK_OnePerLine || (!BinPackInconclusiveFunctions && Current.PackingKind == PPK_Inconclusive))); - // If this '[' opens an ObjC call, determine whether all parameters fit - // into one line and put one per line if they don't. - if (Current.is(TT_ObjCMethodExpr) && Style.ColumnLimit != 0 && - getLengthToMatchingParen(Current) + State.Column > + if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) { + if (Style.ColumnLimit) { + // If this '[' opens an ObjC call, determine whether all parameters fit + // into one line and put one per line if they don't. + if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit(State)) - BreakBeforeParameter = true; + BreakBeforeParameter = true; + } else { + // For ColumnLimit = 0, we have to figure out whether there is or has to + // be a line break within this call. + for (const FormatToken *Tok = &Current; + Tok && Tok != Current.MatchingParen; Tok = Tok->Next) { + if (Tok->MustBreakBefore || + (Tok->CanBreakBefore && Tok->NewlinesBefore > 0)) { + BreakBeforeParameter = true; + break; + } + } + } + } } bool NoLineBreak = State.Stack.back().NoLineBreak || (Current.is(TT_TemplateOpener) && State.Stack.back().ContainsUnwrappedBuilder); - unsigned NestedBlockIndent = State.Stack.back().NestedBlockIndent; - State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, - State.Stack.back().LastSpace, + unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall, + State.Stack.back().NestedBlockIndent); + State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, LastSpace, AvoidBinPacking, NoLineBreak)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; @@ -1082,8 +1143,9 @@ bool ContinuationIndenter::nextIsMultilineString(const LineState &State) { if (Current.getNextNonComment() && Current.getNextNonComment()->isStringLiteral()) return true; // Implicit concatenation. - if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength > - Style.ColumnLimit) + if (Style.ColumnLimit != 0 && + State.Column + Current.ColumnWidth + Current.UnbreakableTailLength > + Style.ColumnLimit) return true; // String will be split. return false; } diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 2a4721f2b3b7d..10c68f9da6174 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -109,10 +109,8 @@ struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { } }; -template <> -struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { - static void enumeration(IO &IO, - FormatStyle::PointerAlignmentStyle &Value) { +template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { + static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); IO.enumCase(Value, "Left", FormatStyle::PAS_Left); IO.enumCase(Value, "Right", FormatStyle::PAS_Right); @@ -144,8 +142,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("Language", Style.Language); if (IO.outputting()) { - StringRef StylesArray[] = { "LLVM", "Google", "Chromium", - "Mozilla", "WebKit", "GNU" }; + StringRef StylesArray[] = {"LLVM", "Google", "Chromium", + "Mozilla", "WebKit", "GNU"}; ArrayRef<StringRef> Styles(StylesArray); for (size_t i = 0, e = Styles.size(); i < e; ++i) { StringRef StyleName(Styles[i]); @@ -176,6 +174,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); IO.mapOptional("AlignOperands", Style.AlignOperands); IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); + IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments); IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", Style.AllowAllParametersOfDeclarationOnNextLine); IO.mapOptional("AllowShortBlocksOnASingleLine", @@ -273,7 +272,7 @@ template <> struct MappingTraits<FormatStyle> { // will be used to get default values for missing keys. // If the first element has no Language specified, it will be treated as the // default one for the following elements. -template <> struct DocumentListTraits<std::vector<FormatStyle> > { +template <> struct DocumentListTraits<std::vector<FormatStyle>> { static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { return Seq.size(); } @@ -331,6 +330,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.AlignAfterOpenBracket = true; LLVMStyle.AlignOperands = true; LLVMStyle.AlignTrailingComments = true; + LLVMStyle.AlignConsecutiveAssignments = false; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; LLVMStyle.AllowShortBlocksOnASingleLine = false; @@ -600,10 +600,10 @@ public: FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), - Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), - Style(Style), IdentTable(getFormattingLangOpts(Style)), - Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), - FormattingDisabled(false) { + LessStashed(false), Column(0), TrailingWhitespace(0), + SourceMgr(SourceMgr), ID(ID), Style(Style), + IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), + Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) { Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, getFormattingLangOpts(Style))); Lex->SetKeepWhitespaceMode(true); @@ -619,7 +619,7 @@ public: do { Tokens.push_back(getNextToken()); tryMergePreviousTokens(); - if (Tokens.back()->NewlinesBefore > 0) + if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); return Tokens; @@ -633,32 +633,62 @@ private: return; if (tryMergeConflictMarkers()) return; + if (tryMergeLessLess()) + return; if (Style.Language == FormatStyle::LK_JavaScript) { if (tryMergeJSRegexLiteral()) return; if (tryMergeEscapeSequence()) return; + if (tryMergeTemplateString()) + return; - static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; - static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; - static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater, - tok::greaterequal }; - static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater }; - // FIXME: We probably need to change token type to mimic operator with the - // correct priority. - if (tryMergeTokens(JSIdentity)) + static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; + static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, + tok::equal}; + static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, + tok::greaterequal}; + static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; + // FIXME: Investigate what token type gives the correct operator priority. + if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) return; - if (tryMergeTokens(JSNotIdentity)) + if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator)) return; - if (tryMergeTokens(JSShiftEqual)) + if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator)) return; - if (tryMergeTokens(JSRightArrow)) + if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) return; } } - bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) { + bool tryMergeLessLess() { + // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. + if (Tokens.size() < 3) + return false; + + bool FourthTokenIsLess = false; + if (Tokens.size() > 3) + FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less); + + auto First = Tokens.end() - 3; + if (First[2]->is(tok::less) || First[1]->isNot(tok::less) || + First[0]->isNot(tok::less) || FourthTokenIsLess) + return false; + + // Only merge if there currently is no whitespace between the two "<". + if (First[1]->WhitespaceRange.getBegin() != + First[1]->WhitespaceRange.getEnd()) + return false; + + First[0]->Tok.setKind(tok::lessless); + First[0]->TokenText = "<<"; + First[0]->ColumnWidth += 1; + Tokens.erase(Tokens.end() - 2); + return true; + } + + bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) { if (Tokens.size() < Kinds.size()) return false; @@ -668,8 +698,9 @@ private: return false; unsigned AddLength = 0; for (unsigned i = 1; i < Kinds.size(); ++i) { - if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != - First[i]->WhitespaceRange.getEnd()) + if (!First[i]->is(Kinds[i]) || + First[i]->WhitespaceRange.getBegin() != + First[i]->WhitespaceRange.getEnd()) return false; AddLength += First[i]->TokenText.size(); } @@ -677,6 +708,7 @@ private: First[0]->TokenText = StringRef(First[0]->TokenText.data(), First[0]->TokenText.size() + AddLength); First[0]->ColumnWidth += AddLength; + First[0]->Type = NewType; return true; } @@ -720,7 +752,7 @@ private: unsigned LastColumn = Tokens.back()->OriginalColumn; for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { ++TokenCount; - if (I[0]->is(tok::slash) && I + 1 != E && + if (I[0]->isOneOf(tok::slash, tok::slashequal) && I + 1 != E && (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace, tok::exclaim, tok::l_square, tok::colon, tok::comma, tok::question, tok::kw_return) || @@ -745,6 +777,91 @@ private: return false; } + bool tryMergeTemplateString() { + if (Tokens.size() < 2) + return false; + + FormatToken *EndBacktick = Tokens.back(); + // Backticks get lexed as tok::unknown tokens. If a template string contains + // a comment start, it gets lexed as a tok::comment, or tok::unknown if + // unterminated. + if (!EndBacktick->isOneOf(tok::comment, tok::unknown)) + return false; + size_t CommentBacktickPos = EndBacktick->TokenText.find('`'); + // Unknown token that's not actually a backtick, or a comment that doesn't + // contain a backtick. + if (CommentBacktickPos == StringRef::npos) + return false; + + unsigned TokenCount = 0; + bool IsMultiline = false; + unsigned EndColumnInFirstLine = + EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; + for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { + ++TokenCount; + if (I[0]->NewlinesBefore > 0 || I[0]->IsMultiline) + IsMultiline = true; + + // If there was a preceding template string, this must be the start of a + // template string, not the end. + if (I[0]->is(TT_TemplateString)) + return false; + + if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { + // Keep track of the rhs offset of the last token to wrap across lines - + // its the rhs offset of the first line of the template string, used to + // determine its width. + if (I[0]->IsMultiline) + EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; + // If the token has newlines, the token before it (if it exists) is the + // rhs end of the previous line. + if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) + EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; + + continue; + } + + Tokens.resize(Tokens.size() - TokenCount); + Tokens.back()->Type = TT_TemplateString; + const char *EndOffset = + EndBacktick->TokenText.data() + 1 + CommentBacktickPos; + if (CommentBacktickPos != 0) { + // If the backtick was not the first character (e.g. in a comment), + // re-lex after the backtick position. + SourceLocation Loc = EndBacktick->Tok.getLocation(); + resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); + } + Tokens.back()->TokenText = + StringRef(Tokens.back()->TokenText.data(), + EndOffset - Tokens.back()->TokenText.data()); + + unsigned EndOriginalColumn = EndBacktick->OriginalColumn; + if (EndOriginalColumn == 0) { + SourceLocation Loc = EndBacktick->Tok.getLocation(); + EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); + } + // If the ` is further down within the token (e.g. in a comment). + EndOriginalColumn += CommentBacktickPos; + + if (IsMultiline) { + // ColumnWidth is from backtick to last token in line. + // LastLineColumnWidth is 0 to backtick. + // x = `some content + // until here`; + Tokens.back()->ColumnWidth = + EndColumnInFirstLine - Tokens.back()->OriginalColumn; + Tokens.back()->LastLineColumnWidth = EndOriginalColumn; + Tokens.back()->IsMultiline = true; + } else { + // Token simply spans from start to end, +1 for the ` itself. + Tokens.back()->ColumnWidth = + EndOriginalColumn - Tokens.back()->OriginalColumn + 1; + } + return true; + } + return false; + } + bool tryMerge_TMacro() { if (Tokens.size() < 4) return false; @@ -772,6 +889,8 @@ private: String->OriginalColumn = Macro->OriginalColumn; String->ColumnWidth = encoding::columnWidthWithTabs( String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); + String->NewlinesBefore = Macro->NewlinesBefore; + String->HasUnescapedNewline = Macro->HasUnescapedNewline; Tokens.pop_back(); Tokens.pop_back(); @@ -842,21 +961,33 @@ private: return false; } + FormatToken *getStashedToken() { + // Create a synthesized second '>' or '<' token. + Token Tok = FormatTok->Tok; + StringRef TokenText = FormatTok->TokenText; + + unsigned OriginalColumn = FormatTok->OriginalColumn; + FormatTok = new (Allocator.Allocate()) FormatToken; + FormatTok->Tok = Tok; + SourceLocation TokLocation = + FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1); + FormatTok->Tok.setLocation(TokLocation); + FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation); + FormatTok->TokenText = TokenText; + FormatTok->ColumnWidth = 1; + FormatTok->OriginalColumn = OriginalColumn + 1; + + return FormatTok; + } + FormatToken *getNextToken() { if (GreaterStashed) { - // Create a synthesized second '>' token. - // FIXME: Increment Column and set OriginalColumn. - Token Greater = FormatTok->Tok; - FormatTok = new (Allocator.Allocate()) FormatToken; - FormatTok->Tok = Greater; - SourceLocation GreaterLocation = - FormatTok->Tok.getLocation().getLocWithOffset(1); - FormatTok->WhitespaceRange = - SourceRange(GreaterLocation, GreaterLocation); - FormatTok->TokenText = ">"; - FormatTok->ColumnWidth = 1; GreaterStashed = false; - return FormatTok; + return getStashedToken(); + } + if (LessStashed) { + LessStashed = false; + return getStashedToken(); } FormatTok = new (Allocator.Allocate()) FormatToken; @@ -869,20 +1000,32 @@ private: // Consume and record whitespace until we find a significant token. unsigned WhitespaceLength = TrailingWhitespace; while (FormatTok->Tok.is(tok::unknown)) { - for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) { - switch (FormatTok->TokenText[i]) { + StringRef Text = FormatTok->TokenText; + auto EscapesNewline = [&](int pos) { + // A '\r' here is just part of '\r\n'. Skip it. + if (pos >= 0 && Text[pos] == '\r') + --pos; + // See whether there is an odd number of '\' before this. + unsigned count = 0; + for (; pos >= 0; --pos, ++count) + if (Text[pos] != '\\') + break; + return count & 1; + }; + // FIXME: This miscounts tok:unknown tokens that are not just + // whitespace, e.g. a '`' character. + for (int i = 0, e = Text.size(); i != e; ++i) { + switch (Text[i]) { case '\n': ++FormatTok->NewlinesBefore; - // FIXME: This is technically incorrect, as it could also - // be a literal backslash at the end of the line. - if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' && - (FormatTok->TokenText[i - 1] != '\r' || i == 1 || - FormatTok->TokenText[i - 2] != '\\'))) - FormatTok->HasUnescapedNewline = true; + FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; Column = 0; break; case '\r': + FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; + Column = 0; + break; case '\f': case '\v': Column = 0; @@ -894,8 +1037,7 @@ private: Column += Style.TabWidth - Column % Style.TabWidth; break; case '\\': - if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' && - FormatTok->TokenText[i + 1] != '\n')) + if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) FormatTok->Type = TT_ImplicitStringLiteral; break; default: @@ -920,6 +1062,7 @@ private: FormatTok->TokenText[1] == '\n') { ++FormatTok->NewlinesBefore; WhitespaceLength += 2; + FormatTok->LastNewlineOffset = 2; Column = 0; FormatTok->TokenText = FormatTok->TokenText.substr(2); } @@ -948,6 +1091,10 @@ private: FormatTok->Tok.setKind(tok::greater); FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); GreaterStashed = true; + } else if (FormatTok->Tok.is(tok::lessless)) { + FormatTok->Tok.setKind(tok::less); + FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); + LessStashed = true; } // Now FormatTok is the next non-whitespace token. @@ -975,16 +1122,16 @@ private: Column = FormatTok->LastLineColumnWidth; } - FormatTok->IsForEachMacro = - std::binary_search(ForEachMacros.begin(), ForEachMacros.end(), - FormatTok->Tok.getIdentifierInfo()); + if (std::find(ForEachMacros.begin(), ForEachMacros.end(), + FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) + FormatTok->Type = TT_ForEachMacro; return FormatTok; } FormatToken *FormatTok; bool IsFirstToken; - bool GreaterStashed; + bool GreaterStashed, LessStashed; unsigned Column; unsigned TrailingWhitespace; std::unique_ptr<Lexer> Lex; @@ -1072,13 +1219,13 @@ public: << "\n"); } - tooling::Replacements format() { + tooling::Replacements format(bool *IncompleteFormat) { tooling::Replacements Result; FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding); UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this); - bool StructuralError = Parser.parse(); + Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { @@ -1088,7 +1235,7 @@ public: AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); } tooling::Replacements RunResult = - format(AnnotatedLines, StructuralError, Tokens); + format(AnnotatedLines, Tokens, IncompleteFormat); DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; for (tooling::Replacements::iterator I = RunResult.begin(), @@ -1107,7 +1254,7 @@ public: } tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - bool StructuralError, FormatTokenLexer &Tokens) { + FormatTokenLexer &Tokens, bool *IncompleteFormat) { TokenAnnotator Annotator(Style, Tokens.getKeywords()); for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { Annotator.annotate(*AnnotatedLines[i]); @@ -1122,8 +1269,9 @@ public: ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr, Whitespaces, Encoding, BinPackInconclusiveFunctions); - UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style); - Formatter.format(AnnotatedLines, /*DryRun=*/false); + UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), + IncompleteFormat) + .format(AnnotatedLines); return Whitespaces.generateReplacements(); } @@ -1340,27 +1488,20 @@ private: } // end anonymous namespace -tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, - SourceManager &SourceMgr, - ArrayRef<CharSourceRange> Ranges) { - if (Style.DisableFormat) - return tooling::Replacements(); - return reformat(Style, SourceMgr, - SourceMgr.getFileID(Lex.getSourceLocation()), Ranges); -} - tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID, - ArrayRef<CharSourceRange> Ranges) { + ArrayRef<CharSourceRange> Ranges, + bool *IncompleteFormat) { if (Style.DisableFormat) return tooling::Replacements(); Formatter formatter(Style, SourceMgr, ID, Ranges); - return formatter.format(); + return formatter.format(IncompleteFormat); } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, - StringRef FileName) { + StringRef FileName, + bool *IncompleteFormat) { if (Style.DisableFormat) return tooling::Replacements(); @@ -1383,7 +1524,7 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return reformat(Style, SourceMgr, ID, CharRanges); + return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat); } LangOptions getFormattingLangOpts(const FormatStyle &Style) { @@ -1392,12 +1533,12 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.LineComment = 1; - bool AlternativeOperators = Style.Language != FormatStyle::LK_JavaScript && - Style.Language != FormatStyle::LK_Java; + bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp; LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; LangOpts.Bool = 1; LangOpts.ObjC1 = 1; LangOpts.ObjC2 = 1; + LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. return LangOpts; } @@ -1415,7 +1556,8 @@ const char *StyleOptionHelpDescription = static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { if (FileName.endswith(".java")) { return FormatStyle::LK_Java; - } else if (FileName.endswith_lower(".js")) { + } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) { + // JavaScript or TypeScript. return FormatStyle::LK_JavaScript; } else if (FileName.endswith_lower(".proto") || FileName.endswith_lower(".protodevel")) { diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp index badb3a39c82c2..88678ca1abe1a 100644 --- a/lib/Format/FormatToken.cpp +++ b/lib/Format/FormatToken.cpp @@ -18,6 +18,7 @@ #include "clang/Format/Format.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" +#include <climits> namespace clang { namespace format { @@ -59,12 +60,13 @@ void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} unsigned CommaSeparatedList::formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) { - if (!State.NextToken->Previous || !State.NextToken->Previous->Previous) + if (State.NextToken == nullptr || !State.NextToken->Previous) return 0; // Ensure that we start on the opening brace. - const FormatToken *LBrace = State.NextToken->Previous->Previous; - if (LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block || + const FormatToken *LBrace = + State.NextToken->Previous->getPreviousNonComment(); + if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral || LBrace->Next->Type == TT_DesignatedInitializerPeriod) return 0; @@ -132,9 +134,9 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { return; // In C++11 braced list style, we should not format in columns unless they - // have many items (20 or more) or we allow bin-packing of function - // parameters. - if (Style.Cpp11BracedListStyle && !Style.BinPackParameters && + // have many items (20 or more) or we allow bin-packing of function call + // arguments. + if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && Commas.size() < 19) return; @@ -143,19 +145,21 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { return; FormatToken *ItemBegin = Token->Next; + while (ItemBegin->isTrailingComment()) + ItemBegin = ItemBegin->Next; SmallVector<bool, 8> MustBreakBeforeItem; // The lengths of an item if it is put at the end of the line. This includes // trailing comments which are otherwise ignored for column alignment. SmallVector<unsigned, 8> EndOfLineItemLength; - unsigned MinItemLength = Style.ColumnLimit; - unsigned MaxItemLength = 0; - + bool HasSeparatingComment = false; for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { // Skip comments on their own line. - while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) + while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { ItemBegin = ItemBegin->Next; + HasSeparatingComment = i > 0; + } MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); if (ItemBegin->is(tok::l_brace)) @@ -178,8 +182,6 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { ItemEnd = Commas[i]; // The comma is counted as part of the item when calculating the length. ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); - MinItemLength = std::min(MinItemLength, ItemLengths.back()); - MaxItemLength = std::max(MaxItemLength, ItemLengths.back()); // Consume trailing comments so the are included in EndOfLineItemLength. if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && @@ -194,12 +196,9 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { ItemBegin = ItemEnd->Next; } - // If this doesn't have a nested list, we require at least 6 elements in order - // create a column layout. If it has a nested list, column layout ensures one - // list element per line. If the difference between the shortest and longest - // element is too large, column layout would create too much whitespace. - if (HasNestedBracedList || Commas.size() < 5 || Token->NestingLevel != 0 || - MaxItemLength - MinItemLength > 10) + // Don't use column layout for nested lists, lists with few elements and in + // presence of separating comments. + if (Token->NestingLevel != 0 || Commas.size() < 5 || HasSeparatingComment) return; // We can never place more than ColumnLimit / 3 items in a row (because of the @@ -208,6 +207,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { ColumnFormat Format; Format.Columns = Columns; Format.ColumnSizes.resize(Columns); + std::vector<unsigned> MinSizeInColumn(Columns, UINT_MAX); Format.LineCount = 1; bool HasRowWithSufficientColumns = false; unsigned Column = 0; @@ -219,9 +219,10 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { } if (Column == Columns - 1) HasRowWithSufficientColumns = true; - unsigned length = + unsigned Length = (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; - Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], length); + Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); + MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); ++Column; } // If all rows are terminated early (e.g. by trailing comments), we don't @@ -229,9 +230,19 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { if (!HasRowWithSufficientColumns) break; Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. - for (unsigned i = 0; i < Columns; ++i) { + + for (unsigned i = 0; i < Columns; ++i) Format.TotalWidth += Format.ColumnSizes[i]; - } + + // Don't use this Format, if the difference between the longest and shortest + // element in a column exceeds a threshold to avoid excessive spaces. + if ([&] { + for (unsigned i = 0; i < Columns - 1; ++i) + if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) + return true; + return false; + }()) + continue; // Ignore layouts that are bound to violate the column limit. if (Format.TotalWidth > Style.ColumnLimit) diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index 4811e02dd2282..ec0fdf4aa813b 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -41,13 +41,19 @@ enum TokenType { TT_CtorInitializerComma, TT_DesignatedInitializerPeriod, TT_DictLiteral, + TT_ForEachMacro, + TT_FunctionAnnotationRParen, TT_FunctionDeclarationName, TT_FunctionLBrace, TT_FunctionTypeLParen, TT_ImplicitStringLiteral, TT_InheritanceColon, + TT_InlineASMBrace, TT_InlineASMColon, TT_JavaAnnotation, + TT_JsFatArrow, + TT_JsTypeColon, + TT_JsTypeOptionalQuestion, TT_LambdaArrow, TT_LambdaLSquare, TT_LeadingJavaAnnotation, @@ -59,6 +65,7 @@ enum TokenType { TT_ObjCMethodExpr, TT_ObjCMethodSpecifier, TT_ObjCProperty, + TT_ObjCStringLiteral, TT_OverloadedOperator, TT_OverloadedOperatorLParen, TT_PointerOrReference, @@ -69,6 +76,7 @@ enum TokenType { TT_StartOfName, TT_TemplateCloser, TT_TemplateOpener, + TT_TemplateString, TT_TrailingAnnotation, TT_TrailingReturnArrow, TT_TrailingUnaryOperator, @@ -102,21 +110,7 @@ class AnnotatedLine; /// \brief A wrapper around a \c Token storing information about the /// whitespace characters preceding it. struct FormatToken { - FormatToken() - : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), - ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false), - IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false), - BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), - CanBreakBefore(false), ClosesTemplateDeclaration(false), - ParameterCount(0), BlockParameterCount(0), - PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0), - BindingStrength(0), NestingLevel(0), SplitPenalty(0), - LongestObjCSelectorName(0), FakeRParens(0), - StartsBinaryExpression(false), EndsBinaryExpression(false), - OperatorIndex(0), LastOperator(false), - PartOfMultiVariableDeclStmt(false), IsForEachMacro(false), - MatchingParen(nullptr), Previous(nullptr), Next(nullptr), - Decision(FD_Unformatted), Finalized(false) {} + FormatToken() {} /// \brief The \c Token. Token Tok; @@ -125,48 +119,39 @@ struct FormatToken { /// /// This can be used to determine what the user wrote in the original code /// and thereby e.g. leave an empty line between two function definitions. - unsigned NewlinesBefore; + unsigned NewlinesBefore = 0; /// \brief Whether there is at least one unescaped newline before the \c /// Token. - bool HasUnescapedNewline; + bool HasUnescapedNewline = false; /// \brief The range of the whitespace immediately preceding the \c Token. SourceRange WhitespaceRange; /// \brief The offset just past the last '\n' in this token's leading /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. - unsigned LastNewlineOffset; + unsigned LastNewlineOffset = 0; /// \brief The width of the non-whitespace parts of the token (or its first /// line for multi-line tokens) in columns. /// We need this to correctly measure number of columns a token spans. - unsigned ColumnWidth; + unsigned ColumnWidth = 0; /// \brief Contains the width in columns of the last line of a multi-line /// token. - unsigned LastLineColumnWidth; + unsigned LastLineColumnWidth = 0; /// \brief Whether the token text contains newlines (escaped or not). - bool IsMultiline; + bool IsMultiline = false; /// \brief Indicates that this is the first token. - bool IsFirst; + bool IsFirst = false; /// \brief Whether there must be a line break before this token. /// /// This happens for example when a preprocessor directive ended directly /// before the token. - bool MustBreakBefore; - - /// \brief Returns actual token start location without leading escaped - /// newlines and whitespace. - /// - /// This can be different to Tok.getLocation(), which includes leading escaped - /// newlines. - SourceLocation getStartOfNonWhitespace() const { - return WhitespaceRange.getEnd(); - } + bool MustBreakBefore = false; /// \brief The raw text of the token. /// @@ -175,69 +160,74 @@ struct FormatToken { StringRef TokenText; /// \brief Set to \c true if this token is an unterminated literal. - bool IsUnterminatedLiteral; + bool IsUnterminatedLiteral = 0; /// \brief Contains the kind of block if this token is a brace. - BraceBlockKind BlockKind; + BraceBlockKind BlockKind = BK_Unknown; - TokenType Type; + TokenType Type = TT_Unknown; /// \brief The number of spaces that should be inserted before this token. - unsigned SpacesRequiredBefore; + unsigned SpacesRequiredBefore = 0; /// \brief \c true if it is allowed to break before this token. - bool CanBreakBefore; + bool CanBreakBefore = false; - bool ClosesTemplateDeclaration; + /// \brief \c true if this is the ">" of "template<..>". + bool ClosesTemplateDeclaration = false; /// \brief Number of parameters, if this is "(", "[" or "<". /// /// This is initialized to 1 as we don't need to distinguish functions with /// 0 parameters from functions with 1 parameter. Thus, we can simply count /// the number of commas. - unsigned ParameterCount; + unsigned ParameterCount = 0; /// \brief Number of parameters that are nested blocks, /// if this is "(", "[" or "<". - unsigned BlockParameterCount; + unsigned BlockParameterCount = 0; + + /// \brief If this is a bracket ("<", "(", "[" or "{"), contains the kind of + /// the surrounding bracket. + tok::TokenKind ParentBracket = tok::unknown; /// \brief A token can have a special role that can carry extra information /// about the token's formatting. std::unique_ptr<TokenRole> Role; /// \brief If this is an opening parenthesis, how are the parameters packed? - ParameterPackingKind PackingKind; + ParameterPackingKind PackingKind = PPK_Inconclusive; /// \brief The total length of the unwrapped line up to and including this /// token. - unsigned TotalLength; + unsigned TotalLength = 0; /// \brief The original 0-based column of this token, including expanded tabs. /// The configured TabWidth is used as tab width. - unsigned OriginalColumn; + unsigned OriginalColumn = 0; /// \brief The length of following tokens until the next natural split point, /// or the next token that can be broken. - unsigned UnbreakableTailLength; + unsigned UnbreakableTailLength = 0; // FIXME: Come up with a 'cleaner' concept. /// \brief The binding strength of a token. This is a combined value of /// operator precedence, parenthesis nesting, etc. - unsigned BindingStrength; + unsigned BindingStrength = 0; /// \brief The nesting level of this token, i.e. the number of surrounding (), /// [], {} or <>. - unsigned NestingLevel; + unsigned NestingLevel = 0; /// \brief Penalty for inserting a line break before this token. - unsigned SplitPenalty; + unsigned SplitPenalty = 0; /// \brief If this is the first ObjC selector name in an ObjC method /// definition or call, this contains the length of the longest name. /// /// This being set to 0 means that the selectors should not be colon-aligned, /// e.g. because several of them are block-type. - unsigned LongestObjCSelectorName; + unsigned LongestObjCSelectorName = 0; /// \brief Stores the number of required fake parentheses and the /// corresponding operator precedence. @@ -246,29 +236,47 @@ struct FormatToken { /// reverse order, i.e. inner fake parenthesis first. SmallVector<prec::Level, 4> FakeLParens; /// \brief Insert this many fake ) after this token for correct indentation. - unsigned FakeRParens; + unsigned FakeRParens = 0; /// \brief \c true if this token starts a binary expression, i.e. has at least /// one fake l_paren with a precedence greater than prec::Unknown. - bool StartsBinaryExpression; + bool StartsBinaryExpression = false; /// \brief \c true if this token ends a binary expression. - bool EndsBinaryExpression; + bool EndsBinaryExpression = false; /// \brief Is this is an operator (or "."/"->") in a sequence of operators /// with the same precedence, contains the 0-based operator index. - unsigned OperatorIndex; + unsigned OperatorIndex = 0; /// \brief Is this the last operator (or "."/"->") in a sequence of operators /// with the same precedence? - bool LastOperator; + bool LastOperator = false; /// \brief Is this token part of a \c DeclStmt defining multiple variables? /// /// Only set if \c Type == \c TT_StartOfName. - bool PartOfMultiVariableDeclStmt; + bool PartOfMultiVariableDeclStmt = false; + + /// \brief If this is a bracket, this points to the matching one. + FormatToken *MatchingParen = nullptr; + + /// \brief The previous token in the unwrapped line. + FormatToken *Previous = nullptr; - /// \brief Is this a foreach macro? - bool IsForEachMacro; + /// \brief The next token in the unwrapped line. + FormatToken *Next = nullptr; + + /// \brief If this token starts a block, this contains all the unwrapped lines + /// in it. + SmallVector<AnnotatedLine *, 1> Children; + + /// \brief Stores the formatting decision for the token once it was made. + FormatDecision Decision = FD_Unformatted; + + /// \brief If \c true, this token has been fully formatted (indented and + /// potentially re-formatted inside), and we do not allow further formatting + /// changes. + bool Finalized = false; bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } bool is(TokenType TT) const { return Type == TT; } @@ -278,27 +286,10 @@ struct FormatToken { template <typename A, typename B> bool isOneOf(A K1, B K2) const { return is(K1) || is(K2); } - template <typename A, typename B, typename C> - bool isOneOf(A K1, B K2, C K3) const { - return is(K1) || is(K2) || is(K3); - } - template <typename A, typename B, typename C, typename D> - bool isOneOf(A K1, B K2, C K3, D K4) const { - return is(K1) || is(K2) || is(K3) || is(K4); + template <typename A, typename B, typename... Ts> + bool isOneOf(A K1, B K2, Ts... Ks) const { + return is(K1) || isOneOf(K2, Ks...); } - template <typename A, typename B, typename C, typename D, typename E> - bool isOneOf(A K1, B K2, C K3, D K4, E K5) const { - return is(K1) || is(K2) || is(K3) || is(K4) || is(K5); - } - template <typename T> - bool isOneOf(T K1, T K2, T K3, T K4, T K5, T K6, T K7 = tok::NUM_TOKENS, - T K8 = tok::NUM_TOKENS, T K9 = tok::NUM_TOKENS, - T K10 = tok::NUM_TOKENS, T K11 = tok::NUM_TOKENS, - T K12 = tok::NUM_TOKENS) const { - return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || - is(K8) || is(K9) || is(K10) || is(K11) || is(K12); - } - template <typename T> bool isNot(T Kind) const { return !is(Kind); } bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } @@ -336,7 +327,8 @@ struct FormatToken { /// \brief Returns \c true if this is a "." or "->" accessing a member. bool isMemberAccess() const { return isOneOf(tok::arrow, tok::period, tok::arrowstar) && - !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow); + !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, + TT_LambdaArrow); } bool isUnaryOperator() const { @@ -385,6 +377,15 @@ struct FormatToken { } } + /// \brief Returns actual token start location without leading escaped + /// newlines and whitespace. + /// + /// This can be different to Tok.getLocation(), which includes leading escaped + /// newlines. + SourceLocation getStartOfNonWhitespace() const { + return WhitespaceRange.getEnd(); + } + prec::Level getPrecedence() const { return getBinOpPrecedence(Tok.getKind(), true, true); } @@ -419,25 +420,10 @@ struct FormatToken { return MatchingParen && MatchingParen->opensBlockTypeList(Style); } - FormatToken *MatchingParen; - - FormatToken *Previous; - FormatToken *Next; - - SmallVector<AnnotatedLine *, 1> Children; - - /// \brief Stores the formatting decision for the token once it was made. - FormatDecision Decision; - - /// \brief If \c true, this token has been fully formatted (indented and - /// potentially re-formatted inside), and we do not allow further formatting - /// changes. - bool Finalized; - private: // Disallow copying. - FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION; - void operator=(const FormatToken &) LLVM_DELETED_FUNCTION; + FormatToken(const FormatToken &) = delete; + void operator=(const FormatToken &) = delete; }; class ContinuationIndenter; @@ -543,6 +529,7 @@ struct AdditionalKeywords { kw_finally = &IdentTable.get("finally"); kw_function = &IdentTable.get("function"); + kw_import = &IdentTable.get("import"); kw_var = &IdentTable.get("var"); kw_abstract = &IdentTable.get("abstract"); @@ -555,24 +542,33 @@ struct AdditionalKeywords { kw_package = &IdentTable.get("package"); kw_synchronized = &IdentTable.get("synchronized"); kw_throws = &IdentTable.get("throws"); + kw___except = &IdentTable.get("__except"); + + kw_mark = &IdentTable.get("mark"); kw_option = &IdentTable.get("option"); kw_optional = &IdentTable.get("optional"); kw_repeated = &IdentTable.get("repeated"); kw_required = &IdentTable.get("required"); kw_returns = &IdentTable.get("returns"); + + kw_signals = &IdentTable.get("signals"); + kw_slots = &IdentTable.get("slots"); + kw_qslots = &IdentTable.get("Q_SLOTS"); } - // ObjC context sensitive keywords. + // Context sensitive keywords. IdentifierInfo *kw_in; IdentifierInfo *kw_CF_ENUM; IdentifierInfo *kw_CF_OPTIONS; IdentifierInfo *kw_NS_ENUM; IdentifierInfo *kw_NS_OPTIONS; + IdentifierInfo *kw___except; // JavaScript keywords. IdentifierInfo *kw_finally; IdentifierInfo *kw_function; + IdentifierInfo *kw_import; IdentifierInfo *kw_var; // Java keywords. @@ -587,12 +583,20 @@ struct AdditionalKeywords { IdentifierInfo *kw_synchronized; IdentifierInfo *kw_throws; + // Pragma keywords. + IdentifierInfo *kw_mark; + // Proto keywords. IdentifierInfo *kw_option; IdentifierInfo *kw_optional; IdentifierInfo *kw_repeated; IdentifierInfo *kw_required; IdentifierInfo *kw_returns; + + // QT keywords. + IdentifierInfo *kw_signals; + IdentifierInfo *kw_slots; + IdentifierInfo *kw_qslots; }; } // namespace format diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 4ba3f91969776..98f5709b90621 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -15,6 +15,7 @@ #include "TokenAnnotator.h" #include "clang/Basic/SourceManager.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "format-token-annotator" @@ -43,8 +44,14 @@ private: bool parseAngle() { if (!CurrentToken) return false; - ScopedContextCreator ContextCreator(*this, tok::less, 10); FormatToken *Left = CurrentToken->Previous; + Left->ParentBracket = Contexts.back().ContextKind; + ScopedContextCreator ContextCreator(*this, tok::less, 10); + + // If this angle is in the context of an expression, we need to be more + // hesitant to detect it as opening template parameters. + bool InExprContext = Contexts.back().IsExpression; + Contexts.back().IsExpression = false; // If there's a template keyword before the opening angle bracket, this is a // template parameter, not an argument. @@ -68,8 +75,8 @@ private: next(); continue; } - if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, - tok::colon, tok::question)) + if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || + (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext)) return false; // If a && or || is found and interpreted as a binary operator, this set // of angles is likely part of something like "a < b && c > d". If the @@ -92,6 +99,8 @@ private: bool parseParens(bool LookForDecls = false) { if (!CurrentToken) return false; + FormatToken *Left = CurrentToken->Previous; + Left->ParentBracket = Contexts.back().ContextKind; ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); // FIXME: This is a bit of a hack. Do better. @@ -99,7 +108,6 @@ private: Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; bool StartsObjCMethodExpr = false; - FormatToken *Left = CurrentToken->Previous; if (CurrentToken->is(tok::caret)) { // (^ can start a block type. Left->Type = TT_ObjCBlockLParen; @@ -117,22 +125,22 @@ private: Left->Previous->is(TT_BinaryOperator))) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; - } else if (Line.InPPDirective && - (!Left->Previous || - !Left->Previous->isOneOf(tok::identifier, - TT_OverloadedOperator))) { - Contexts.back().IsExpression = true; } else if (Left->Previous && Left->Previous->is(tok::r_square) && Left->Previous->MatchingParen && Left->Previous->MatchingParen->is(TT_LambdaLSquare)) { // This is a parameter list of a lambda expression. Contexts.back().IsExpression = false; + } else if (Line.InPPDirective && + (!Left->Previous || + !Left->Previous->isOneOf(tok::identifier, + TT_OverloadedOperator))) { + Contexts.back().IsExpression = true; } else if (Contexts[Contexts.size() - 2].CaretFound) { // This is the parameter list of an ObjC block. Contexts.back().IsExpression = false; } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { Left->Type = TT_AttributeParen; - } else if (Left->Previous && Left->Previous->IsForEachMacro) { + } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { // The first argument to a foreach macro is a declaration. Contexts.back().IsForEachMacro = true; Contexts.back().IsExpression = false; @@ -149,6 +157,8 @@ private: bool MightBeFunctionType = CurrentToken->is(tok::star); bool HasMultipleLines = false; bool HasMultipleParametersOnALine = false; + bool MightBeObjCForRangeLoop = + Left->Previous && Left->Previous->is(tok::kw_for); while (CurrentToken) { // LookForDecls is set when "if (" has been seen. Check for // 'identifier' '*' 'identifier' followed by not '=' -- this @@ -210,7 +220,8 @@ private: } if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) return false; - else if (CurrentToken->is(tok::l_brace)) + + if (CurrentToken->is(tok::l_brace)) Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen if (CurrentToken->is(tok::comma) && CurrentToken->Next && !CurrentToken->Next->HasUnescapedNewline && @@ -219,6 +230,15 @@ private: if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || CurrentToken->isSimpleTypeSpecifier()) Contexts.back().IsExpression = false; + if (CurrentToken->isOneOf(tok::semi, tok::colon)) + MightBeObjCForRangeLoop = false; + if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) + CurrentToken->Type = TT_ObjCForIn; + // When we discover a 'new', we set CanBeExpression to 'false' in order to + // parse the type correctly. Reset that after a comma. + if (CurrentToken->is(tok::comma)) + Contexts.back().CanBeExpression = true; + FormatToken *Tok = CurrentToken; if (!consumeToken()) return false; @@ -237,6 +257,7 @@ private: // ')' or ']'), it could be the start of an Objective-C method // expression, or it could the the start of an Objective-C array literal. FormatToken *Left = CurrentToken->Previous; + Left->ParentBracket = Contexts.back().ContextKind; FormatToken *Parent = Left->getPreviousNonComment(); bool StartsObjCMethodExpr = Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && @@ -316,6 +337,7 @@ private: bool parseBrace() { if (CurrentToken) { FormatToken *Left = CurrentToken->Previous; + Left->ParentBracket = Contexts.back().ContextKind; if (Contexts.back().CaretFound) Left->Type = TT_ObjCBlockLBrace; @@ -342,7 +364,8 @@ private: Style.Language == FormatStyle::LK_Proto) && Previous->is(tok::identifier)) Previous->Type = TT_SelectorName; - if (CurrentToken->is(tok::colon)) + if (CurrentToken->is(tok::colon) || + Style.Language == FormatStyle::LK_JavaScript) Left->Type = TT_DictLiteral; } if (!consumeToken()) @@ -408,10 +431,18 @@ private: if (!Tok->Previous) return false; // Colons from ?: are handled in parseConditional(). - if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1 && - Line.First->isNot(tok::kw_case)) { - Tok->Type = TT_CtorInitializerColon; - } else if (Contexts.back().ColonIsDictLiteral) { + if (Style.Language == FormatStyle::LK_JavaScript) { + if (Contexts.back().ColonIsForRangeExpr || // colon in for loop + (Contexts.size() == 1 && // switch/case labels + !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) || + Contexts.back().ContextKind == tok::l_paren || // function params + Contexts.back().ContextKind == tok::l_square || // array type + Line.MustBeDeclaration) { // method/property declaration + Tok->Type = TT_JsTypeColon; + break; + } + } + if (Contexts.back().ColonIsDictLiteral) { Tok->Type = TT_DictLiteral; } else if (Contexts.back().ColonIsObjCMethodExpr || Line.First->is(TT_ObjCMethodSpecifier)) { @@ -429,7 +460,10 @@ private: Tok->Type = TT_BitFieldColon; } else if (Contexts.size() == 1 && !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { - Tok->Type = TT_InheritanceColon; + if (Tok->Previous->is(tok::r_paren)) + Tok->Type = TT_CtorInitializerColon; + else + Tok->Type = TT_InheritanceColon; } else if (Tok->Previous->is(tok::identifier) && Tok->Next && Tok->Next->isOneOf(tok::r_paren, tok::comma)) { // This handles a special macro in ObjC code where selectors including @@ -471,13 +505,15 @@ private: return false; break; case tok::less: - if ((!Tok->Previous || + if (!NonTemplateLess.count(Tok) && + (!Tok->Previous || (!Tok->Previous->Tok.isLiteral() && !(Tok->Previous->is(tok::r_paren) && Contexts.size() > 1))) && parseAngle()) { Tok->Type = TT_TemplateOpener; } else { Tok->Type = TT_BinaryOperator; + NonTemplateLess.insert(Tok); CurrentToken = Tok; next(); } @@ -509,21 +545,34 @@ private: } break; case tok::question: + if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next && + Tok->Next->isOneOf(tok::semi, tok::colon, tok::r_paren, + tok::r_brace)) { + // Question marks before semicolons, colons, etc. indicate optional + // types (fields, parameters), e.g. + // function(x?: string, y?) {...} + // class X { y?; } + Tok->Type = TT_JsTypeOptionalQuestion; + break; + } + // Declarations cannot be conditional expressions, this can only be part + // of a type declaration. + if (Line.MustBeDeclaration && + Style.Language == FormatStyle::LK_JavaScript) + break; parseConditional(); break; case tok::kw_template: parseTemplateDeclaration(); break; - case tok::identifier: - if (Line.First->is(tok::kw_for) && Tok->is(Keywords.kw_in) && - Tok->Previous->isNot(tok::colon)) - Tok->Type = TT_ObjCForIn; - break; case tok::comma: - if (Contexts.back().FirstStartOfName) - Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; if (Contexts.back().InCtorInitializer) Tok->Type = TT_CtorInitializerComma; + else if (Contexts.back().FirstStartOfName && + (Contexts.size() == 1 || Line.First->is(tok::kw_for))) { + Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; + Line.IsMultiVariableDeclStmt = true; + } if (Contexts.back().IsForEachMacro) Contexts.back().IsExpression = true; break; @@ -557,11 +606,14 @@ private: void parsePragma() { next(); // Consume "pragma". - if (CurrentToken && CurrentToken->TokenText == "mark") { + if (CurrentToken && + CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) { + bool IsMark = CurrentToken->is(Keywords.kw_mark); next(); // Consume "mark". next(); // Consume first token (so we fix leading whitespace). while (CurrentToken) { - CurrentToken->Type = TT_ImplicitStringLiteral; + if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator)) + CurrentToken->Type = TT_ImplicitStringLiteral; next(); } } @@ -582,6 +634,7 @@ private: return Type; switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_include: + case tok::pp_include_next: case tok::pp_import: next(); parseIncludeDirective(); @@ -609,9 +662,9 @@ private: public: LineType parseLine() { - if (CurrentToken->is(tok::hash)) { + NonTemplateLess.clear(); + if (CurrentToken->is(tok::hash)) return parsePreprocessorDirective(); - } // Directly allow to 'import <string-literal>' to support protocol buffer // definitions (code.google.com/p/protobuf) or missing "#" (either way we @@ -635,6 +688,15 @@ public: return LT_ImportStatement; } + // In .proto files, top-level options are very similar to import statements + // and should not be line-wrapped. + if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 && + CurrentToken->is(Keywords.kw_option)) { + next(); + if (CurrentToken && CurrentToken->is(tok::identifier)) + return LT_ImportStatement; + } + bool KeywordVirtualFound = false; bool ImportStatement = false; while (CurrentToken) { @@ -678,11 +740,13 @@ private: // Reset token type in case we have already looked at it and then // recovered from an error (e.g. failure to find the matching >). - if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_FunctionLBrace, - TT_ImplicitStringLiteral, TT_RegexLiteral, + if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro, + TT_FunctionLBrace, TT_ImplicitStringLiteral, + TT_InlineASMBrace, TT_RegexLiteral, TT_TrailingReturnArrow)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); + CurrentToken->MatchingParen = nullptr; CurrentToken->FakeLParens.clear(); CurrentToken->FakeRParens = 0; } @@ -705,27 +769,22 @@ private: Context(tok::TokenKind ContextKind, unsigned BindingStrength, bool IsExpression) : ContextKind(ContextKind), BindingStrength(BindingStrength), - LongestObjCSelectorName(0), ColonIsForRangeExpr(false), - ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false), - FirstObjCSelectorName(nullptr), FirstStartOfName(nullptr), - IsExpression(IsExpression), CanBeExpression(true), - InTemplateArgument(false), InCtorInitializer(false), - CaretFound(false), IsForEachMacro(false) {} + IsExpression(IsExpression) {} tok::TokenKind ContextKind; unsigned BindingStrength; - unsigned LongestObjCSelectorName; - bool ColonIsForRangeExpr; - bool ColonIsDictLiteral; - bool ColonIsObjCMethodExpr; - FormatToken *FirstObjCSelectorName; - FormatToken *FirstStartOfName; bool IsExpression; - bool CanBeExpression; - bool InTemplateArgument; - bool InCtorInitializer; - bool CaretFound; - bool IsForEachMacro; + unsigned LongestObjCSelectorName = 0; + bool ColonIsForRangeExpr = false; + bool ColonIsDictLiteral = false; + bool ColonIsObjCMethodExpr = false; + FormatToken *FirstObjCSelectorName = nullptr; + FormatToken *FirstStartOfName = nullptr; + bool CanBeExpression = true; + bool InTemplateArgument = false; + bool InCtorInitializer = false; + bool CaretFound = false; + bool IsForEachMacro = false; }; /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime @@ -746,23 +805,29 @@ private: void modifyContext(const FormatToken &Current) { if (Current.getPrecedence() == prec::Assignment && - !Line.First->isOneOf(tok::kw_template, tok::kw_using, - TT_UnaryOperator) && + !Line.First->isOneOf(tok::kw_template, tok::kw_using) && (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { Contexts.back().IsExpression = true; - for (FormatToken *Previous = Current.Previous; - Previous && !Previous->isOneOf(tok::comma, tok::semi); - Previous = Previous->Previous) { - if (Previous->isOneOf(tok::r_square, tok::r_paren)) { - Previous = Previous->MatchingParen; - if (!Previous) + if (!Line.First->is(TT_UnaryOperator)) { + for (FormatToken *Previous = Current.Previous; + Previous && !Previous->isOneOf(tok::comma, tok::semi); + Previous = Previous->Previous) { + if (Previous->isOneOf(tok::r_square, tok::r_paren)) { + Previous = Previous->MatchingParen; + if (!Previous) + break; + } + if (Previous->opensScope()) break; + if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && + Previous->isOneOf(tok::star, tok::amp, tok::ampamp) && + Previous->Previous && Previous->Previous->isNot(tok::equal)) + Previous->Type = TT_PointerOrReference; } - if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && - Previous->isOneOf(tok::star, tok::amp) && Previous->Previous && - Previous->Previous->isNot(tok::equal)) - Previous->Type = TT_PointerOrReference; } + } else if (Current.is(tok::lessless) && + (!Current.Previous || !Current.Previous->is(tok::kw_operator))) { + Contexts.back().IsExpression = true; } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { Contexts.back().IsExpression = true; } else if (Current.is(TT_TrailingReturnArrow)) { @@ -833,30 +898,56 @@ private: } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { Current.Type = TT_UnaryOperator; } else if (Current.is(tok::question)) { - Current.Type = TT_ConditionalExpr; + if (Style.Language == FormatStyle::LK_JavaScript && + Line.MustBeDeclaration) { + // In JavaScript, `interface X { foo?(): bar; }` is an optional method + // on the interface, not a ternary expression. + Current.Type = TT_JsTypeOptionalQuestion; + } else { + Current.Type = TT_ConditionalExpr; + } } else if (Current.isBinaryOperator() && (!Current.Previous || Current.Previous->isNot(tok::l_square))) { Current.Type = TT_BinaryOperator; } else if (Current.is(tok::comment)) { - if (Current.TokenText.startswith("//")) + if (Current.TokenText.startswith("/*")) { + if (Current.TokenText.endswith("*/")) + Current.Type = TT_BlockComment; + else + // The lexer has for some reason determined a comment here. But we + // cannot really handle it, if it isn't properly terminated. + Current.Tok.setKind(tok::unknown); + } else { Current.Type = TT_LineComment; - else - Current.Type = TT_BlockComment; + } } else if (Current.is(tok::r_paren)) { if (rParenEndsCast(Current)) Current.Type = TT_CastRParen; + if (Current.MatchingParen && Current.Next && + !Current.Next->isBinaryOperator() && + !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace)) + if (FormatToken *BeforeParen = Current.MatchingParen->Previous) + if (BeforeParen->is(tok::identifier) && + BeforeParen->TokenText == BeforeParen->TokenText.upper() && + (!BeforeParen->Previous || + BeforeParen->Previous->ClosesTemplateDeclaration)) + Current.Type = TT_FunctionAnnotationRParen; } else if (Current.is(tok::at) && Current.Next) { - switch (Current.Next->Tok.getObjCKeywordID()) { - case tok::objc_interface: - case tok::objc_implementation: - case tok::objc_protocol: - Current.Type = TT_ObjCDecl; - break; - case tok::objc_property: - Current.Type = TT_ObjCProperty; - break; - default: - break; + if (Current.Next->isStringLiteral()) { + Current.Type = TT_ObjCStringLiteral; + } else { + switch (Current.Next->Tok.getObjCKeywordID()) { + case tok::objc_interface: + case tok::objc_implementation: + case tok::objc_protocol: + Current.Type = TT_ObjCDecl; + break; + case tok::objc_property: + Current.Type = TT_ObjCProperty; + break; + default: + break; + } } } else if (Current.is(tok::period)) { FormatToken *PreviousNoComment = Current.getPreviousNonComment(); @@ -875,7 +966,9 @@ private: // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. Current.Type = TT_TrailingAnnotation; - } else if (Style.Language == FormatStyle::LK_Java && Current.Previous) { + } else if ((Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) && + Current.Previous) { if (Current.Previous->is(tok::at) && Current.isNot(Keywords.kw_interface)) { const FormatToken &AtToken = *Current.Previous; @@ -902,7 +995,7 @@ private: return false; if (Tok.Previous->is(TT_LeadingJavaAnnotation)) - return false; + return false; // Skip "const" as it does not have an influence on whether this is a name. FormatToken *PreviousNotConst = Tok.Previous; @@ -964,8 +1057,7 @@ private: bool IsSizeOfOrAlignOf = LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && - ((Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression) || - (Tok.Next && Tok.Next->isBinaryOperator()))) + (Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression)) IsCast = true; else if (Tok.Next && Tok.Next->isNot(tok::string_literal) && (Tok.Next->Tok.isLiteral() || @@ -995,7 +1087,8 @@ private: } for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) { - if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) { + if (!Prev || + !Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) { IsCast = false; break; } @@ -1032,7 +1125,7 @@ private: if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare)) return TT_PointerOrReference; - if (NextToken->isOneOf(tok::kw_operator, tok::comma)) + if (NextToken->isOneOf(tok::kw_operator, tok::comma, tok::semi)) return TT_PointerOrReference; if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && @@ -1108,10 +1201,16 @@ private: FormatToken *CurrentToken; bool AutoFound; const AdditionalKeywords &Keywords; + + // Set of "<" tokens that do not open a template parameter list. If parseAngle + // determines that a specific token can't be a template opener, it will make + // same decision irrespective of the decisions for tokens leading up to it. + // Store this information to prevent this from causing exponential runtime. + llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess; }; -static int PrecedenceUnaryOperator = prec::PointerToMember + 1; -static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; +static const int PrecedenceUnaryOperator = prec::PointerToMember + 1; +static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; /// \brief Parses binary expressions by inserting fake parenthesis based on /// operator precedence. @@ -1361,12 +1460,13 @@ static bool isFunctionDeclarationName(const FormatToken &Current) { assert(Next->is(tok::l_paren)); if (Next->Next == Next->MatchingParen) return true; - for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen; + for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; Tok = Tok->Next) { if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || Tok->isOneOf(TT_PointerOrReference, TT_StartOfName)) return true; - if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral()) + if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) || + Tok->Tok.isLiteral()) return false; } return false; @@ -1502,7 +1602,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::comma) && Left.NestingLevel == 0) return 3; } else if (Style.Language == FormatStyle::LK_JavaScript) { - if (Right.is(Keywords.kw_function)) + if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma)) return 100; } @@ -1512,6 +1612,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Right.is(tok::l_square)) { if (Style.Language == FormatStyle::LK_Proto) return 1; + // Slightly prefer formatting local lambda definitions like functions. + if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal)) + return 50; if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare)) return 500; } @@ -1521,11 +1624,15 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) return 3; if (Left.is(TT_StartOfName)) - return 20; + return 110; if (InFunctionDecl && Right.NestingLevel == 0) return Style.PenaltyReturnTypeOnItsOwnLine; return 200; } + if (Right.is(TT_PointerOrReference)) + return 190; + if (Right.is(TT_TrailingReturnArrow)) + return 110; if (Left.is(tok::equal) && Right.is(tok::l_brace)) return 150; if (Left.is(TT_CastRParen)) @@ -1575,6 +1682,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket) return 100; + if (Left.is(tok::l_paren) && Left.Previous && Left.Previous->is(tok::kw_if)) + return 1000; if (Left.is(tok::equal) && InFunctionDecl) return 110; if (Right.is(tok::r_brace)) @@ -1591,7 +1700,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 50; if (Right.is(tok::lessless)) { - if (Left.is(tok::string_literal)) { + if (Left.is(tok::string_literal) && + (!Right.LastOperator || Right.OperatorIndex != 1)) { StringRef Content = Left.TokenText; if (Content.startswith("\"")) Content = Content.drop_front(1); @@ -1607,7 +1717,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(TT_ConditionalExpr)) return prec::Conditional; prec::Level Level = Left.getPrecedence(); - + if (Level != prec::Unknown) + return Level; + Level = Right.getPrecedence(); if (Level != prec::Unknown) return Level; @@ -1636,7 +1748,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Right.isOneOf(tok::semi, tok::comma)) return false; if (Right.is(tok::less) && - (Left.isOneOf(tok::kw_template, tok::r_paren) || + (Left.is(tok::kw_template) || (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) return true; if (Left.isOneOf(tok::exclaim, tok::tilde)) @@ -1655,17 +1767,27 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(tok::l_square) && Right.is(tok::amp)) return false; if (Right.is(TT_PointerOrReference)) - return Left.Tok.isLiteral() || - (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && - Style.PointerAlignment != FormatStyle::PAS_Left); + return !(Left.is(tok::r_paren) && Left.MatchingParen && + (Left.MatchingParen->is(TT_OverloadedOperatorLParen) || + (Left.MatchingParen->Previous && + Left.MatchingParen->Previous->is( + TT_FunctionDeclarationName)))) && + (Left.Tok.isLiteral() || + (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && + (Style.PointerAlignment != FormatStyle::PAS_Left || + Line.IsMultiVariableDeclStmt))); if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && (!Left.is(TT_PointerOrReference) || - Style.PointerAlignment != FormatStyle::PAS_Right)) + (Style.PointerAlignment != FormatStyle::PAS_Right && + !Line.IsMultiVariableDeclStmt))) return true; if (Left.is(TT_PointerOrReference)) return Right.Tok.isLiteral() || Right.is(TT_BlockComment) || - (!Right.isOneOf(TT_PointerOrReference, tok::l_paren) && - Style.PointerAlignment != FormatStyle::PAS_Right && Left.Previous && + (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, + tok::l_paren) && + (Style.PointerAlignment != FormatStyle::PAS_Right && + !Line.IsMultiVariableDeclStmt) && + Left.Previous && !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; @@ -1700,13 +1822,12 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, - tok::kw_switch, tok::kw_case) || - (Left.isOneOf(tok::kw_try, tok::kw_catch, tok::kw_new, - tok::kw_delete) && - (!Left.Previous || Left.Previous->isNot(tok::period))) || - Left.IsForEachMacro)) || + tok::kw_switch, tok::kw_case, TT_ForEachMacro) || + (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch, + tok::kw_new, tok::kw_delete) && + (!Left.Previous || Left.Previous->isNot(tok::period))))) || (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && - (Left.is(tok::identifier) || Left.isFunctionLikeKeyword()) && + (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() || Left.is(tok::r_paren)) && Line.Type != LT_PreprocessorDirective); } if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) @@ -1748,6 +1869,20 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(Keywords.kw_var)) return true; + if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion)) + return false; + if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) && + Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) + return false; + if (Left.is(tok::ellipsis)) + return false; + if (Left.is(TT_TemplateCloser) && + !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square, + Keywords.kw_implements, Keywords.kw_extends)) + // Type assertions ('<type>expr') are not followed by whitespace. Other + // locations that should have whitespace following are identified by the + // above set of follower tokens. + return false; } else if (Style.Language == FormatStyle::LK_Java) { if (Left.is(tok::r_square) && Right.is(tok::l_brace)) return true; @@ -1789,16 +1924,29 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return Right.is(tok::coloncolon); if (Right.is(TT_OverloadedOperatorLParen)) return false; - if (Right.is(tok::colon)) - return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && - Right.getNextNonComment() && Right.isNot(TT_ObjCMethodExpr) && - !Left.is(tok::question) && - !(Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) && - (Right.isNot(TT_DictLiteral) || Style.SpacesInContainerLiterals); + if (Right.is(tok::colon)) { + if (Line.First->isOneOf(tok::kw_case, tok::kw_default) || + !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi)) + return false; + if (Right.is(TT_ObjCMethodExpr)) + return false; + if (Left.is(tok::question)) + return false; + if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) + return false; + if (Right.is(TT_DictLiteral)) + return Style.SpacesInContainerLiterals; + return true; + } if (Left.is(TT_UnaryOperator)) return Right.is(TT_BinaryOperator); + + // If the next token is a binary operator or a selector name, we have + // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly. if (Left.is(TT_CastRParen)) - return Style.SpaceAfterCStyleCast || Right.is(TT_BinaryOperator); + return Style.SpaceAfterCStyleCast || + Right.isOneOf(TT_BinaryOperator, TT_SelectorName); + if (Left.is(tok::greater) && Right.is(tok::greater)) { return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); @@ -1819,7 +1967,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) return true; - if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren)) + if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) && + Right.isNot(TT_FunctionTypeLParen)) return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) && Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) @@ -1850,9 +1999,12 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, // intention is to insert a line break after it in order to make shuffling // around entries easier. const FormatToken *BeforeClosingBrace = nullptr; - if (Left.is(tok::l_brace) && Left.BlockKind != BK_Block && Left.MatchingParen) + if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) && + Left.BlockKind != BK_Block && Left.MatchingParen) BeforeClosingBrace = Left.MatchingParen->Previous; - else if (Right.is(tok::r_brace) && Right.BlockKind != BK_Block) + else if (Right.MatchingParen && + Right.MatchingParen->isOneOf(tok::l_brace, + TT_ArrayInitializerLSquare)) BeforeClosingBrace = &Left; if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || BeforeClosingBrace->isTrailingComment())) @@ -1862,8 +2014,10 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return Left.BlockKind != BK_BracedInit && Left.isNot(TT_CtorInitializerColon) && (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); - if (Right.Previous->isTrailingComment() || - (Right.isStringLiteral() && Right.Previous->isStringLiteral())) + if (Left.isTrailingComment()) + return true; + if (Left.isStringLiteral() && + (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral))) return true; if (Right.Previous->IsUnterminatedLiteral) return true; @@ -1889,6 +2043,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, Style.Language == FormatStyle::LK_Proto) // Don't put enums onto single lines in protocol buffers. return true; + if (Right.is(TT_InlineASMBrace)) + return Right.HasUnescapedNewline; if (Style.Language == FormatStyle::LK_JavaScript && Right.is(tok::r_brace) && Left.is(tok::l_brace) && !Left.Children.empty()) // Support AllowShortFunctionsOnASingleLine for JavaScript. @@ -1903,8 +2059,12 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) return true; - if (Right.is(tok::lessless) && Left.is(tok::identifier) && - Left.TokenText == "endl") + + if ((Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) && + Left.is(TT_LeadingJavaAnnotation) && + Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && + Line.Last->is(tok::l_brace)) return true; if (Style.Language == FormatStyle::LK_JavaScript) { @@ -1913,13 +2073,15 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, Left.Previous->is(tok::char_constant)) return true; if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && - Left.NestingLevel == 0) + Left.NestingLevel == 0 && Left.Previous && + Left.Previous->is(tok::equal) && + Line.First->isOneOf(tok::identifier, Keywords.kw_import, + tok::kw_export) && + // kw_var is a pseudo-token that's a tok::identifier, so matches above. + !Line.First->is(Keywords.kw_var)) + // Enum style object literal. return true; } else if (Style.Language == FormatStyle::LK_Java) { - if (Left.is(TT_LeadingJavaAnnotation) && - Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && - Line.Last->is(tok::l_brace)) - return true; if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && Right.Next->is(tok::string_literal)) return true; @@ -1947,9 +2109,15 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return false; if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) return !Right.is(tok::l_paren); + if (Right.is(TT_PointerOrReference)) + return Line.IsMultiVariableDeclStmt || + (Style.PointerAlignment == FormatStyle::PAS_Right && + (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName))); if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || Right.is(tok::kw_operator)) return true; + if (Left.is(TT_PointerOrReference)) + return false; if (Right.isTrailingComment()) // We rely on MustBreakBefore being set correctly here as we should not // change the "binding" behavior of a comment. @@ -1970,8 +2138,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return false; if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))) return true; - if (Right.is(TT_SelectorName)) - return true; + if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && + Right.Next->is(TT_ObjCMethodExpr))) + return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls. if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) return true; if (Left.ClosesTemplateDeclaration) @@ -1983,17 +2152,16 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return true; if (Right.is(TT_RangeBasedForLoopColon)) return false; - if (Left.isOneOf(TT_PointerOrReference, TT_TemplateCloser, - TT_UnaryOperator) || + if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || Left.is(tok::kw_operator)) return false; - if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) + if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && + Line.Type == LT_VirtualFunctionDecl) return false; if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) return false; if (Left.is(tok::l_paren) && Left.Previous && - (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen) || - Left.Previous->is(tok::kw_if))) + (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) return false; if (Right.is(TT_ImplicitStringLiteral)) return false; @@ -2027,8 +2195,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializersBeforeComma) return true; - if (Left.is(tok::greater) && Right.is(tok::greater) && - Left.isNot(TT_TemplateCloser)) + if ((Left.is(tok::greater) && Right.is(tok::greater)) || + (Left.is(tok::less) && Right.is(tok::less))) return false; if (Right.is(TT_BinaryOperator) && Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && @@ -2046,8 +2214,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return true; return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, tok::kw_class, tok::kw_struct) || - Right.isMemberAccess() || Right.is(TT_TrailingReturnArrow) || - Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) || + Right.isMemberAccess() || + Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, + tok::colon, tok::l_square, tok::at) || (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_const)) || (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index ff8e32a56afc1..a948cdb1c4194 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -42,8 +42,8 @@ public: : First(Line.Tokens.front().Tok), Level(Line.Level), InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), - Affected(false), LeadingEmptyLinesAffected(false), - ChildrenAffected(false) { + IsMultiVariableDeclStmt(false), Affected(false), + LeadingEmptyLinesAffected(false), ChildrenAffected(false) { assert(!Line.Tokens.empty()); // Calculate Next and Previous for all tokens. Note that we must overwrite @@ -59,11 +59,8 @@ public: I->Tok->Previous = Current; Current = Current->Next; Current->Children.clear(); - for (SmallVectorImpl<UnwrappedLine>::const_iterator - I = Node.Children.begin(), - E = Node.Children.end(); - I != E; ++I) { - Children.push_back(new AnnotatedLine(*I)); + for (const auto& Child : Node.Children) { + Children.push_back(new AnnotatedLine(Child)); Current->Children.push_back(Children.back()); } } @@ -75,6 +72,12 @@ public: for (unsigned i = 0, e = Children.size(); i != e; ++i) { delete Children[i]; } + FormatToken *Current = First; + while (Current) { + Current->Children.clear(); + Current->Role.reset(); + Current = Current->Next; + } } FormatToken *First; @@ -87,6 +90,7 @@ public: bool InPPDirective; bool MustBeDeclaration; bool MightBeFunctionDecl; + bool IsMultiVariableDeclStmt; /// \c True if this line should be formatted, i.e. intersects directly or /// indirectly with one of the input ranges. @@ -101,8 +105,8 @@ public: private: // Disallow copying. - AnnotatedLine(const AnnotatedLine &) LLVM_DELETED_FUNCTION; - void operator=(const AnnotatedLine &) LLVM_DELETED_FUNCTION; + AnnotatedLine(const AnnotatedLine &) = delete; + void operator=(const AnnotatedLine &) = delete; }; /// \brief Determines extra information about the tokens comprising an diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index ca66e7351641c..cbf8c6c922118 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -25,19 +25,152 @@ bool startsExternCBlock(const AnnotatedLine &Line) { NextNext && NextNext->is(tok::l_brace); } +/// \brief Tracks the indent level of \c AnnotatedLines across levels. +/// +/// \c nextLine must be called for each \c AnnotatedLine, after which \c +/// getIndent() will return the indent for the last line \c nextLine was called +/// with. +/// If the line is not formatted (and thus the indent does not change), calling +/// \c adjustToUnmodifiedLine after the call to \c nextLine will cause +/// subsequent lines on the same level to be indented at the same level as the +/// given line. +class LevelIndentTracker { +public: + LevelIndentTracker(const FormatStyle &Style, + const AdditionalKeywords &Keywords, unsigned StartLevel, + int AdditionalIndent) + : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) { + for (unsigned i = 0; i != StartLevel; ++i) + IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); + } + + /// \brief Returns the indent for the current line. + unsigned getIndent() const { return Indent; } + + /// \brief Update the indent state given that \p Line is going to be formatted + /// next. + void nextLine(const AnnotatedLine &Line) { + Offset = getIndentOffset(*Line.First); + if (Line.InPPDirective) { + Indent = Line.Level * Style.IndentWidth + AdditionalIndent; + } else { + while (IndentForLevel.size() <= Line.Level) + IndentForLevel.push_back(-1); + IndentForLevel.resize(Line.Level + 1); + Indent = getIndent(IndentForLevel, Line.Level); + } + if (static_cast<int>(Indent) + Offset >= 0) + Indent += Offset; + } + + /// \brief Update the level indent to adapt to the given \p Line. + /// + /// When a line is not formatted, we move the subsequent lines on the same + /// level to the same indent. + /// Note that \c nextLine must have been called before this method. + void adjustToUnmodifiedLine(const AnnotatedLine &Line) { + unsigned LevelIndent = Line.First->OriginalColumn; + if (static_cast<int>(LevelIndent) - Offset >= 0) + LevelIndent -= Offset; + if ((Line.First->isNot(tok::comment) || IndentForLevel[Line.Level] == -1) && + !Line.InPPDirective) + IndentForLevel[Line.Level] = LevelIndent; + } + +private: + /// \brief Get the offset of the line relatively to the level. + /// + /// For example, 'public:' labels in classes are offset by 1 or 2 + /// characters to the left from their level. + int getIndentOffset(const FormatToken &RootToken) { + if (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) + return 0; + if (RootToken.isAccessSpecifier(false) || + RootToken.isObjCAccessSpecifier() || + (RootToken.is(Keywords.kw_signals) && RootToken.Next && + RootToken.Next->is(tok::colon))) + return Style.AccessModifierOffset; + return 0; + } + + /// \brief Get the indent of \p Level from \p IndentForLevel. + /// + /// \p IndentForLevel must contain the indent for the level \c l + /// at \p IndentForLevel[l], or a value < 0 if the indent for + /// that level is unknown. + unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) { + if (IndentForLevel[Level] != -1) + return IndentForLevel[Level]; + if (Level == 0) + return 0; + return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; + } + + const FormatStyle &Style; + const AdditionalKeywords &Keywords; + const unsigned AdditionalIndent; + + /// \brief The indent in characters for each level. + std::vector<int> IndentForLevel; + + /// \brief Offset of the current line relative to the indent level. + /// + /// For example, the 'public' keywords is often indented with a negative + /// offset. + int Offset = 0; + + /// \brief The current line's indent. + unsigned Indent = 0; +}; + class LineJoiner { public: - LineJoiner(const FormatStyle &Style) : Style(Style) {} + LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords, + const SmallVectorImpl<AnnotatedLine *> &Lines) + : Style(Style), Keywords(Keywords), End(Lines.end()), + Next(Lines.begin()) {} + + /// \brief Returns the next line, merging multiple lines into one if possible. + const AnnotatedLine *getNextMergedLine(bool DryRun, + LevelIndentTracker &IndentTracker) { + if (Next == End) + return nullptr; + const AnnotatedLine *Current = *Next; + IndentTracker.nextLine(*Current); + unsigned MergedLines = + tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End); + if (MergedLines > 0 && Style.ColumnLimit == 0) + // Disallow line merging if there is a break at the start of one of the + // input lines. + for (unsigned i = 0; i < MergedLines; ++i) + if (Next[i + 1]->First->NewlinesBefore > 0) + MergedLines = 0; + if (!DryRun) + for (unsigned i = 0; i < MergedLines; ++i) + join(*Next[i], *Next[i + 1]); + Next = Next + MergedLines + 1; + return Current; + } +private: /// \brief Calculates how many lines can be merged into 1 starting at \p I. unsigned tryFitMultipleLinesInOne(unsigned Indent, SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E) { + // Can't join the last line with anything. + if (I + 1 == E) + return 0; // We can never merge stuff if there are trailing line comments. const AnnotatedLine *TheLine = *I; if (TheLine->Last->is(TT_LineComment)) return 0; + if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) + return 0; + if (TheLine->InPPDirective && + (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)) + return 0; if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) return 0; @@ -50,9 +183,6 @@ public: ? 0 : Limit - TheLine->Last->TotalLength; - if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) - return 0; - // FIXME: TheLine->Level != 0 might or might not be the right check to do. // If necessary, change to something smarter. bool MergeShortFunctions = @@ -113,15 +243,12 @@ public: return 0; } -private: unsigned tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { if (Limit == 0) return 0; - if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) - return 0; if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) return 0; if (1 + I[1]->Last->TotalLength > Limit) @@ -147,8 +274,8 @@ private: return 0; if (1 + I[1]->Last->TotalLength > Limit) return 0; - if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, - tok::kw_while, TT_LineComment)) + if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while, + TT_LineComment)) return 0; // Only inline simple if's (no nested if or else). if (I + 2 != E && Line.First->is(tok::kw_if) && @@ -157,9 +284,10 @@ private: return 1; } - unsigned tryMergeShortCaseLabels( - SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { + unsigned + tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I, + SmallVectorImpl<AnnotatedLine *>::const_iterator E, + unsigned Limit) { if (Limit == 0 || I + 1 == E || I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) return 0; @@ -191,16 +319,21 @@ private: AnnotatedLine &Line = **I; // Don't merge ObjC @ keywords and methods. + // FIXME: If an option to allow short exception handling clauses on a single + // line is added, change this to not return for @try and friends. if (Style.Language != FormatStyle::LK_Java && Line.First->isOneOf(tok::at, tok::minus, tok::plus)) return 0; // Check that the current line allows merging. This depends on whether we // are in a control flow statements as well as several style flags. - if (Line.First->isOneOf(tok::kw_else, tok::kw_case)) + if (Line.First->isOneOf(tok::kw_else, tok::kw_case) || + (Line.First->Next && Line.First->Next->is(tok::kw_else))) return 0; if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, - tok::kw_catch, tok::kw_for, tok::r_brace)) { + tok::kw___try, tok::kw_catch, tok::kw___finally, + tok::kw_for, tok::r_brace) || + Line.First->is(Keywords.kw___except)) { if (!Style.AllowShortBlocksOnASingleLine) return 0; if (!Style.AllowShortIfStatementsOnASingleLine && @@ -211,7 +344,11 @@ private: return 0; // FIXME: Consider an option to allow short exception handling clauses on // a single line. - if (Line.First->isOneOf(tok::kw_try, tok::kw_catch)) + // FIXME: This isn't covered by tests. + // FIXME: For catch, __except, __finally the first token on the line + // is '}', so this isn't correct here. + if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch, + Keywords.kw___except, tok::kw___finally)) return 0; } @@ -226,7 +363,8 @@ private: } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace) && !startsExternCBlock(Line)) { // We don't merge short records. - if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct)) + if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, + Keywords.kw_interface)) return 0; // Check that we still have three lines and they fit into the limit. @@ -252,6 +390,10 @@ private: if (Tok->isNot(tok::r_brace)) return 0; + // Don't merge "if (a) { .. } else {". + if (Tok->Next && Tok->Next->is(tok::kw_else)) + return 0; + return 2; } return 0; @@ -285,28 +427,367 @@ private: return false; } + void join(AnnotatedLine &A, const AnnotatedLine &B) { + assert(!A.Last->Next); + assert(!B.First->Previous); + if (B.Affected) + A.Affected = true; + A.Last->Next = B.First; + B.First->Previous = A.Last; + B.First->CanBreakBefore = true; + unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; + for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { + Tok->TotalLength += LengthA; + A.Last = Tok; + } + } + const FormatStyle &Style; + const AdditionalKeywords &Keywords; + const SmallVectorImpl<AnnotatedLine*>::const_iterator End; + + SmallVectorImpl<AnnotatedLine*>::const_iterator Next; }; -class NoColumnLimitFormatter { +static void markFinalized(FormatToken *Tok) { + for (; Tok; Tok = Tok->Next) { + Tok->Finalized = true; + for (AnnotatedLine *Child : Tok->Children) + markFinalized(Child->First); + } +} + +#ifndef NDEBUG +static void printLineState(const LineState &State) { + llvm::dbgs() << "State: "; + for (const ParenState &P : State.Stack) { + llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent + << " "; + } + llvm::dbgs() << State.NextToken->TokenText << "\n"; +} +#endif + +/// \brief Base class for classes that format one \c AnnotatedLine. +class LineFormatter { public: - NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} + LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces, + const FormatStyle &Style, + UnwrappedLineFormatter *BlockFormatter) + : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), + BlockFormatter(BlockFormatter) {} + virtual ~LineFormatter() {} + + /// \brief Formats an \c AnnotatedLine and returns the penalty. + /// + /// If \p DryRun is \c false, directly applies the changes. + virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) = 0; + +protected: + /// \brief If the \p State's next token is an r_brace closing a nested block, + /// format the nested block before it. + /// + /// Returns \c true if all children could be placed successfully and adapts + /// \p Penalty as well as \p State. If \p DryRun is false, also directly + /// creates changes using \c Whitespaces. + /// + /// The crucial idea here is that children always get formatted upon + /// encountering the closing brace right after the nested block. Now, if we + /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is + /// \c false), the entire block has to be kept on the same line (which is only + /// possible if it fits on the line, only contains a single statement, etc. + /// + /// If \p NewLine is true, we format the nested block on separate lines, i.e. + /// break after the "{", format all lines with correct indentation and the put + /// the closing "}" on yet another new line. + /// + /// This enables us to keep the simple structure of the + /// \c UnwrappedLineFormatter, where we only have two options for each token: + /// break or don't break. + bool formatChildren(LineState &State, bool NewLine, bool DryRun, + unsigned &Penalty) { + const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); + FormatToken &Previous = *State.NextToken->Previous; + if (!LBrace || LBrace->isNot(tok::l_brace) || + LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) + // The previous token does not open a block. Nothing to do. We don't + // assert so that we can simply call this function for all tokens. + return true; + + if (NewLine) { + int AdditionalIndent = State.Stack.back().Indent - + Previous.Children[0]->Level * Style.IndentWidth; + + Penalty += + BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent, + /*FixBadIndentation=*/true); + return true; + } + + if (Previous.Children[0]->First->MustBreakBefore) + return false; + + // Cannot merge multiple statements into a single line. + if (Previous.Children.size() > 1) + return false; + + // Cannot merge into one line if this line ends on a comment. + if (Previous.is(tok::comment)) + return false; + + // We can't put the closing "}" on a line with a trailing comment. + if (Previous.Children[0]->Last->isTrailingComment()) + return false; + + // If the child line exceeds the column limit, we wouldn't want to merge it. + // We add +2 for the trailing " }". + if (Style.ColumnLimit > 0 && + Previous.Children[0]->Last->TotalLength + State.Column + 2 > + Style.ColumnLimit) + return false; + + if (!DryRun) { + Whitespaces->replaceWhitespace( + *Previous.Children[0]->First, + /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, + /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); + } + Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun); + + State.Column += 1 + Previous.Children[0]->Last->TotalLength; + return true; + } + + ContinuationIndenter *Indenter; + +private: + WhitespaceManager *Whitespaces; + const FormatStyle &Style; + UnwrappedLineFormatter *BlockFormatter; +}; - /// \brief Formats the line starting at \p State, simply keeping all of the - /// input's line breaking decisions. - void format(unsigned FirstIndent, const AnnotatedLine *Line) { +/// \brief Formatter that keeps the existing line breaks. +class NoColumnLimitLineFormatter : public LineFormatter { +public: + NoColumnLimitLineFormatter(ContinuationIndenter *Indenter, + WhitespaceManager *Whitespaces, + const FormatStyle &Style, + UnwrappedLineFormatter *BlockFormatter) + : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + + /// \brief Formats the line, simply keeping all of the input's line breaking + /// decisions. + unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) override { + assert(!DryRun); LineState State = - Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); + Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false); while (State.NextToken) { bool Newline = Indenter->mustBreak(State) || (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); + unsigned Penalty = 0; + formatChildren(State, Newline, /*DryRun=*/false, Penalty); Indenter->addTokenToState(State, Newline, /*DryRun=*/false); } + return 0; + } +}; + +/// \brief Formatter that puts all tokens into a single line without breaks. +class NoLineBreakFormatter : public LineFormatter { +public: + NoLineBreakFormatter(ContinuationIndenter *Indenter, + WhitespaceManager *Whitespaces, const FormatStyle &Style, + UnwrappedLineFormatter *BlockFormatter) + : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + + /// \brief Puts all tokens into a single line. + unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) { + unsigned Penalty = 0; + LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + while (State.NextToken) { + formatChildren(State, /*Newline=*/false, DryRun, Penalty); + Indenter->addTokenToState(State, /*Newline=*/false, DryRun); + } + return Penalty; + } +}; + +/// \brief Finds the best way to break lines. +class OptimizingLineFormatter : public LineFormatter { +public: + OptimizingLineFormatter(ContinuationIndenter *Indenter, + WhitespaceManager *Whitespaces, + const FormatStyle &Style, + UnwrappedLineFormatter *BlockFormatter) + : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + + /// \brief Formats the line by finding the best line breaks with line lengths + /// below the column limit. + unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + bool DryRun) { + LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + + // If the ObjC method declaration does not fit on a line, we should format + // it with one arg per line. + if (State.Line->Type == LT_ObjCMethodDecl) + State.Stack.back().BreakBeforeParameter = true; + + // Find best solution in solution space. + return analyzeSolutionSpace(State, DryRun); } private: - ContinuationIndenter *Indenter; + struct CompareLineStatePointers { + bool operator()(LineState *obj1, LineState *obj2) const { + return *obj1 < *obj2; + } + }; + + /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. + /// + /// In case of equal penalties, we want to prefer states that were inserted + /// first. During state generation we make sure that we insert states first + /// that break the line as late as possible. + typedef std::pair<unsigned, unsigned> OrderedPenalty; + + /// \brief An edge in the solution space from \c Previous->State to \c State, + /// inserting a newline dependent on the \c NewLine. + struct StateNode { + StateNode(const LineState &State, bool NewLine, StateNode *Previous) + : State(State), NewLine(NewLine), Previous(Previous) {} + LineState State; + bool NewLine; + StateNode *Previous; + }; + + /// \brief An item in the prioritized BFS search queue. The \c StateNode's + /// \c State has the given \c OrderedPenalty. + typedef std::pair<OrderedPenalty, StateNode *> QueueItem; + + /// \brief The BFS queue type. + typedef std::priority_queue<QueueItem, std::vector<QueueItem>, + std::greater<QueueItem>> QueueType; + + /// \brief Analyze the entire solution space starting from \p InitialState. + /// + /// This implements a variant of Dijkstra's algorithm on the graph that spans + /// the solution space (\c LineStates are the nodes). The algorithm tries to + /// find the shortest path (the one with lowest penalty) from \p InitialState + /// to a state where all tokens are placed. Returns the penalty. + /// + /// If \p DryRun is \c false, directly applies the changes. + unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) { + std::set<LineState *, CompareLineStatePointers> Seen; + + // Increasing count of \c StateNode items we have created. This is used to + // create a deterministic order independent of the container. + unsigned Count = 0; + QueueType Queue; + + // Insert start element into queue. + StateNode *Node = + new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); + Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); + ++Count; + + unsigned Penalty = 0; + + // While not empty, take first element and follow edges. + while (!Queue.empty()) { + Penalty = Queue.top().first.first; + StateNode *Node = Queue.top().second; + if (!Node->State.NextToken) { + DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); + break; + } + Queue.pop(); + + // Cut off the analysis of certain solutions if the analysis gets too + // complex. See description of IgnoreStackForComparison. + if (Count > 10000) + Node->State.IgnoreStackForComparison = true; + + if (!Seen.insert(&Node->State).second) + // State already examined with lower penalty. + continue; + + FormatDecision LastFormat = Node->State.NextToken->Decision; + if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) + addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); + if (LastFormat == FD_Unformatted || LastFormat == FD_Break) + addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); + } + + if (Queue.empty()) { + // We were unable to find a solution, do nothing. + // FIXME: Add diagnostic? + DEBUG(llvm::dbgs() << "Could not find a solution.\n"); + return 0; + } + + // Reconstruct the solution. + if (!DryRun) + reconstructPath(InitialState, Queue.top().second); + + DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); + DEBUG(llvm::dbgs() << "---\n"); + + return Penalty; + } + + /// \brief Add the following state to the analysis queue \c Queue. + /// + /// Assume the current state is \p PreviousNode and has been reached with a + /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. + void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, + bool NewLine, unsigned *Count, QueueType *Queue) { + if (NewLine && !Indenter->canBreak(PreviousNode->State)) + return; + if (!NewLine && Indenter->mustBreak(PreviousNode->State)) + return; + + StateNode *Node = new (Allocator.Allocate()) + StateNode(PreviousNode->State, NewLine, PreviousNode); + if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) + return; + + Penalty += Indenter->addTokenToState(Node->State, NewLine, true); + + Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); + ++(*Count); + } + + /// \brief Applies the best formatting by reconstructing the path in the + /// solution space that leads to \c Best. + void reconstructPath(LineState &State, StateNode *Best) { + std::deque<StateNode *> Path; + // We do not need a break before the initial token. + while (Best->Previous) { + Path.push_front(Best); + Best = Best->Previous; + } + for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); + I != E; ++I) { + unsigned Penalty = 0; + formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); + Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); + + DEBUG({ + printLineState((*I)->Previous->State); + if ((*I)->NewLine) { + llvm::dbgs() << "Penalty for placing " + << (*I)->Previous->State.NextToken->Tok.getName() << ": " + << Penalty << "\n"; + } + }); + } + } + + llvm::SpecificBumpPtrAllocator<StateNode> Allocator; }; } // namespace @@ -315,7 +796,7 @@ unsigned UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, int AdditionalIndent, bool FixBadIndentation) { - LineJoiner Joiner(Style); + LineJoiner Joiner(Style, Keywords, Lines); // Try to look up already computed penalty in DryRun-mode. std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( @@ -326,151 +807,93 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, assert(!Lines.empty()); unsigned Penalty = 0; - std::vector<int> IndentForLevel; - for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) - IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); + LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level, + AdditionalIndent); const AnnotatedLine *PreviousLine = nullptr; - for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), - E = Lines.end(); - I != E; ++I) { - const AnnotatedLine &TheLine = **I; - const FormatToken *FirstTok = TheLine.First; - int Offset = getIndentOffset(*FirstTok); - - // Determine indent and try to merge multiple unwrapped lines. - unsigned Indent; - if (TheLine.InPPDirective) { - Indent = TheLine.Level * Style.IndentWidth; - } else { - while (IndentForLevel.size() <= TheLine.Level) - IndentForLevel.push_back(-1); - IndentForLevel.resize(TheLine.Level + 1); - Indent = getIndent(IndentForLevel, TheLine.Level); - } - unsigned LevelIndent = Indent; - if (static_cast<int>(Indent) + Offset >= 0) - Indent += Offset; - - // Merge multiple lines if possible. - unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); - if (MergedLines > 0 && Style.ColumnLimit == 0) { - // Disallow line merging if there is a break at the start of one of the - // input lines. - for (unsigned i = 0; i < MergedLines; ++i) { - if (I[i + 1]->First->NewlinesBefore > 0) - MergedLines = 0; - } - } - if (!DryRun) { - for (unsigned i = 0; i < MergedLines; ++i) { - join(*I[i], *I[i + 1]); - } - } - I += MergedLines; - + const AnnotatedLine *NextLine = nullptr; + for (const AnnotatedLine *Line = + Joiner.getNextMergedLine(DryRun, IndentTracker); + Line; Line = NextLine) { + const AnnotatedLine &TheLine = *Line; + unsigned Indent = IndentTracker.getIndent(); bool FixIndentation = - FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); - if (TheLine.First->is(tok::eof)) { - if (PreviousLine && PreviousLine->Affected && !DryRun) { - // Remove the file's trailing whitespace. - unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); - Whitespaces->replaceWhitespace(*TheLine.First, Newlines, - /*IndentLevel=*/0, /*Spaces=*/0, - /*TargetColumn=*/0); - } - } else if (TheLine.Type != LT_Invalid && - (TheLine.Affected || FixIndentation)) { - if (FirstTok->WhitespaceRange.isValid()) { - if (!DryRun) - formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, + FixBadIndentation && (Indent != TheLine.First->OriginalColumn); + bool ShouldFormat = TheLine.Affected || FixIndentation; + // We cannot format this line; if the reason is that the line had a + // parsing error, remember that. + if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat) + *IncompleteFormat = true; + + if (ShouldFormat && TheLine.Type != LT_Invalid) { + if (!DryRun) + formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, + TheLine.InPPDirective); + + NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); + unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); + bool FitsIntoOneLine = + TheLine.Last->TotalLength + Indent <= ColumnLimit || + TheLine.Type == LT_ImportStatement; + + if (Style.ColumnLimit == 0) + NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) + .formatLine(TheLine, Indent, DryRun); + else if (FitsIntoOneLine) + Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) + .formatLine(TheLine, Indent, DryRun); + else + Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) + .formatLine(TheLine, Indent, DryRun); + } else { + // If no token in the current line is affected, we still need to format + // affected children. + if (TheLine.ChildrenAffected) + format(TheLine.Children, DryRun); + + // Adapt following lines on the current indent level to the same level + // unless the current \c AnnotatedLine is not at the beginning of a line. + bool StartsNewLine = + TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst; + if (StartsNewLine) + IndentTracker.adjustToUnmodifiedLine(TheLine); + if (!DryRun) { + bool ReformatLeadingWhitespace = + StartsNewLine && ((PreviousLine && PreviousLine->Affected) || + TheLine.LeadingEmptyLinesAffected); + // Format the first token. + if (ReformatLeadingWhitespace) + formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, + TheLine.First->OriginalColumn, TheLine.InPPDirective); - } else { - Indent = LevelIndent = FirstTok->OriginalColumn; - } - - // If everything fits on a single line, just put it there. - unsigned ColumnLimit = Style.ColumnLimit; - if (I + 1 != E) { - AnnotatedLine *NextLine = I[1]; - if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) - ColumnLimit = getColumnLimit(TheLine.InPPDirective); - } + else + Whitespaces->addUntouchableToken(*TheLine.First, + TheLine.InPPDirective); - if (TheLine.Last->TotalLength + Indent <= ColumnLimit || - TheLine.Type == LT_ImportStatement) { - LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); - while (State.NextToken) { - formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty); - Indenter->addTokenToState(State, /*Newline=*/false, DryRun); - } - } else if (Style.ColumnLimit == 0) { - // FIXME: Implement nested blocks for ColumnLimit = 0. - NoColumnLimitFormatter Formatter(Indenter); - if (!DryRun) - Formatter.format(Indent, &TheLine); - } else { - Penalty += format(TheLine, Indent, DryRun); - } - - if (!TheLine.InPPDirective) - IndentForLevel[TheLine.Level] = LevelIndent; - } else if (TheLine.ChildrenAffected) { - format(TheLine.Children, DryRun); - } else { - // Format the first token if necessary, and notify the WhitespaceManager - // about the unchanged whitespace. - for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { - if (Tok == TheLine.First && (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { - unsigned LevelIndent = Tok->OriginalColumn; - if (!DryRun) { - // Remove trailing whitespace of the previous line. - if ((PreviousLine && PreviousLine->Affected) || - TheLine.LeadingEmptyLinesAffected) { - formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, - TheLine.InPPDirective); - } else { - Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); - } - } - - if (static_cast<int>(LevelIndent) - Offset >= 0) - LevelIndent -= Offset; - if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) - IndentForLevel[TheLine.Level] = LevelIndent; - } else if (!DryRun) { + // Notify the WhitespaceManager about the unchanged whitespace. + for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next) Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); - } - } - } - if (!DryRun) { - for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { - Tok->Finalized = true; } + NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); } - PreviousLine = *I; + if (!DryRun) + markFinalized(TheLine.First); + PreviousLine = &TheLine; } PenaltyCache[CacheKey] = Penalty; return Penalty; } -unsigned UnwrappedLineFormatter::format(const AnnotatedLine &Line, - unsigned FirstIndent, bool DryRun) { - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); - - // If the ObjC method declaration does not fit on a line, we should format - // it with one arg per line. - if (State.Line->Type == LT_ObjCMethodDecl) - State.Stack.back().BreakBeforeParameter = true; - - // Find best solution in solution space. - return analyzeSolutionSpace(State, DryRun); -} - void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, const AnnotatedLine *PreviousLine, unsigned IndentLevel, unsigned Indent, bool InPPDirective) { + if (RootToken.is(tok::eof)) { + unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); + Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0, + /*Spaces=*/0, /*TargetColumn=*/0); + return; + } unsigned Newlines = std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); // Remove empty lines before "}" where applicable. @@ -496,7 +919,8 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, ++Newlines; // Remove empty lines after access specifiers. - if (PreviousLine && PreviousLine->First->isAccessSpecifier()) + if (PreviousLine && PreviousLine->First->isAccessSpecifier() && + (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) Newlines = std::min(1u, Newlines); Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, @@ -504,202 +928,21 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken, !RootToken.HasUnescapedNewline); } -/// \brief Get the indent of \p Level from \p IndentForLevel. -/// -/// \p IndentForLevel must contain the indent for the level \c l -/// at \p IndentForLevel[l], or a value < 0 if the indent for -/// that level is unknown. -unsigned UnwrappedLineFormatter::getIndent(ArrayRef<int> IndentForLevel, - unsigned Level) { - if (IndentForLevel[Level] != -1) - return IndentForLevel[Level]; - if (Level == 0) - return 0; - return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; -} - -void UnwrappedLineFormatter::join(AnnotatedLine &A, const AnnotatedLine &B) { - assert(!A.Last->Next); - assert(!B.First->Previous); - if (B.Affected) - A.Affected = true; - A.Last->Next = B.First; - B.First->Previous = A.Last; - B.First->CanBreakBefore = true; - unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; - for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { - Tok->TotalLength += LengthA; - A.Last = Tok; - } -} - -unsigned UnwrappedLineFormatter::analyzeSolutionSpace(LineState &InitialState, - bool DryRun) { - std::set<LineState *, CompareLineStatePointers> Seen; - - // Increasing count of \c StateNode items we have created. This is used to - // create a deterministic order independent of the container. - unsigned Count = 0; - QueueType Queue; - - // Insert start element into queue. - StateNode *Node = - new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); - Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); - ++Count; - - unsigned Penalty = 0; - - // While not empty, take first element and follow edges. - while (!Queue.empty()) { - Penalty = Queue.top().first.first; - StateNode *Node = Queue.top().second; - if (!Node->State.NextToken) { - DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); - break; - } - Queue.pop(); - - // Cut off the analysis of certain solutions if the analysis gets too - // complex. See description of IgnoreStackForComparison. - if (Count > 10000) - Node->State.IgnoreStackForComparison = true; - - if (!Seen.insert(&Node->State).second) - // State already examined with lower penalty. - continue; - - FormatDecision LastFormat = Node->State.NextToken->Decision; - if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) - addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); - if (LastFormat == FD_Unformatted || LastFormat == FD_Break) - addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); - } - - if (Queue.empty()) { - // We were unable to find a solution, do nothing. - // FIXME: Add diagnostic? - DEBUG(llvm::dbgs() << "Could not find a solution.\n"); - return 0; - } - - // Reconstruct the solution. - if (!DryRun) - reconstructPath(InitialState, Queue.top().second); - - DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); - DEBUG(llvm::dbgs() << "---\n"); - - return Penalty; -} - -#ifndef NDEBUG -static void printLineState(const LineState &State) { - llvm::dbgs() << "State: "; - for (const ParenState &P : State.Stack) { - llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent - << " "; - } - llvm::dbgs() << State.NextToken->TokenText << "\n"; -} -#endif - -void UnwrappedLineFormatter::reconstructPath(LineState &State, - StateNode *Current) { - std::deque<StateNode *> Path; - // We do not need a break before the initial token. - while (Current->Previous) { - Path.push_front(Current); - Current = Current->Previous; - } - for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); - I != E; ++I) { - unsigned Penalty = 0; - formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); - Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); - - DEBUG({ - printLineState((*I)->Previous->State); - if ((*I)->NewLine) { - llvm::dbgs() << "Penalty for placing " - << (*I)->Previous->State.NextToken->Tok.getName() << ": " - << Penalty << "\n"; - } - }); - } -} - -void UnwrappedLineFormatter::addNextStateToQueue(unsigned Penalty, - StateNode *PreviousNode, - bool NewLine, unsigned *Count, - QueueType *Queue) { - if (NewLine && !Indenter->canBreak(PreviousNode->State)) - return; - if (!NewLine && Indenter->mustBreak(PreviousNode->State)) - return; - - StateNode *Node = new (Allocator.Allocate()) - StateNode(PreviousNode->State, NewLine, PreviousNode); - if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) - return; - - Penalty += Indenter->addTokenToState(Node->State, NewLine, true); - - Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); - ++(*Count); -} - -bool UnwrappedLineFormatter::formatChildren(LineState &State, bool NewLine, - bool DryRun, unsigned &Penalty) { - FormatToken &Previous = *State.NextToken->Previous; - const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); - if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind != BK_Block || - Previous.Children.size() == 0) - // The previous token does not open a block. Nothing to do. We don't - // assert so that we can simply call this function for all tokens. - return true; - - if (NewLine) { - int AdditionalIndent = State.Stack.back().Indent - - Previous.Children[0]->Level * Style.IndentWidth; - - Penalty += format(Previous.Children, DryRun, AdditionalIndent, - /*FixBadIndentation=*/true); - return true; - } - - if (Previous.Children[0]->First->MustBreakBefore) - return false; - - // Cannot merge multiple statements into a single line. - if (Previous.Children.size() > 1) - return false; - - // Cannot merge into one line if this line ends on a comment. - if (Previous.is(tok::comment)) - return false; - - // We can't put the closing "}" on a line with a trailing comment. - if (Previous.Children[0]->Last->isTrailingComment()) - return false; - - // If the child line exceeds the column limit, we wouldn't want to merge it. - // We add +2 for the trailing " }". - if (Style.ColumnLimit > 0 && - Previous.Children[0]->Last->TotalLength + State.Column + 2 > - Style.ColumnLimit) - return false; - - if (!DryRun) { - Whitespaces->replaceWhitespace( - *Previous.Children[0]->First, - /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, - /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); - } - Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); - - State.Column += 1 + Previous.Children[0]->Last->TotalLength; - return true; +unsigned +UnwrappedLineFormatter::getColumnLimit(bool InPPDirective, + const AnnotatedLine *NextLine) const { + // In preprocessor directives reserve two chars for trailing " \" if the + // next line continues the preprocessor directive. + bool ContinuesPPDirective = + InPPDirective && + // If there is no next line, this is likely a child line and the parent + // continues the preprocessor directive. + (!NextLine || + (NextLine->InPPDirective && + // If there is an unescaped newline between this line and the next, the + // next line starts a new preprocessor directive. + !NextLine->First->HasUnescapedNewline)); + return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0); } } // namespace format diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h index 3ae6dbc4db0b3..da9aa1c605e4c 100644 --- a/lib/Format/UnwrappedLineFormatter.h +++ b/lib/Format/UnwrappedLineFormatter.h @@ -32,135 +32,39 @@ class UnwrappedLineFormatter { public: UnwrappedLineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces, - const FormatStyle &Style) - : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style) {} + const FormatStyle &Style, + const AdditionalKeywords &Keywords, + bool *IncompleteFormat) + : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), + Keywords(Keywords), IncompleteFormat(IncompleteFormat) {} - unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, - int AdditionalIndent = 0, bool FixBadIndentation = false); + /// \brief Format the current block and return the penalty. + unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, + bool DryRun = false, int AdditionalIndent = 0, + bool FixBadIndentation = false); private: - /// \brief Formats an \c AnnotatedLine and returns the penalty. - /// - /// If \p DryRun is \c false, directly applies the changes. - unsigned format(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun); - - /// \brief An edge in the solution space from \c Previous->State to \c State, - /// inserting a newline dependent on the \c NewLine. - struct StateNode { - StateNode(const LineState &State, bool NewLine, StateNode *Previous) - : State(State), NewLine(NewLine), Previous(Previous) {} - LineState State; - bool NewLine; - StateNode *Previous; - }; - - /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. - /// - /// In case of equal penalties, we want to prefer states that were inserted - /// first. During state generation we make sure that we insert states first - /// that break the line as late as possible. - typedef std::pair<unsigned, unsigned> OrderedPenalty; - - /// \brief An item in the prioritized BFS search queue. The \c StateNode's - /// \c State has the given \c OrderedPenalty. - typedef std::pair<OrderedPenalty, StateNode *> QueueItem; - - /// \brief The BFS queue type. - typedef std::priority_queue<QueueItem, std::vector<QueueItem>, - std::greater<QueueItem> > QueueType; - - /// \brief Get the offset of the line relatively to the level. - /// - /// For example, 'public:' labels in classes are offset by 1 or 2 - /// characters to the left from their level. - int getIndentOffset(const FormatToken &RootToken) { - if (Style.Language == FormatStyle::LK_Java) - return 0; - if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) - return Style.AccessModifierOffset; - return 0; - } - /// \brief Add a new line and the required indent before the first Token /// of the \c UnwrappedLine if there was no structural parsing error. void formatFirstToken(FormatToken &RootToken, const AnnotatedLine *PreviousLine, unsigned IndentLevel, unsigned Indent, bool InPPDirective); - /// \brief Get the indent of \p Level from \p IndentForLevel. - /// - /// \p IndentForLevel must contain the indent for the level \c l - /// at \p IndentForLevel[l], or a value < 0 if the indent for - /// that level is unknown. - unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level); - - void join(AnnotatedLine &A, const AnnotatedLine &B); - - unsigned getColumnLimit(bool InPPDirective) const { - // In preprocessor directives reserve two chars for trailing " \" - return Style.ColumnLimit - (InPPDirective ? 2 : 0); - } - - struct CompareLineStatePointers { - bool operator()(LineState *obj1, LineState *obj2) const { - return *obj1 < *obj2; - } - }; - - /// \brief Analyze the entire solution space starting from \p InitialState. - /// - /// This implements a variant of Dijkstra's algorithm on the graph that spans - /// the solution space (\c LineStates are the nodes). The algorithm tries to - /// find the shortest path (the one with lowest penalty) from \p InitialState - /// to a state where all tokens are placed. Returns the penalty. - /// - /// If \p DryRun is \c false, directly applies the changes. - unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false); - - void reconstructPath(LineState &State, StateNode *Current); - - /// \brief Add the following state to the analysis queue \c Queue. - /// - /// Assume the current state is \p PreviousNode and has been reached with a - /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. - void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, - bool NewLine, unsigned *Count, QueueType *Queue); - - /// \brief If the \p State's next token is an r_brace closing a nested block, - /// format the nested block before it. - /// - /// Returns \c true if all children could be placed successfully and adapts - /// \p Penalty as well as \p State. If \p DryRun is false, also directly - /// creates changes using \c Whitespaces. - /// - /// The crucial idea here is that children always get formatted upon - /// encountering the closing brace right after the nested block. Now, if we - /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is - /// \c false), the entire block has to be kept on the same line (which is only - /// possible if it fits on the line, only contains a single statement, etc. - /// - /// If \p NewLine is true, we format the nested block on separate lines, i.e. - /// break after the "{", format all lines with correct indentation and the put - /// the closing "}" on yet another new line. - /// - /// This enables us to keep the simple structure of the - /// \c UnwrappedLineFormatter, where we only have two options for each token: - /// break or don't break. - bool formatChildren(LineState &State, bool NewLine, bool DryRun, - unsigned &Penalty); - - ContinuationIndenter *Indenter; - WhitespaceManager *Whitespaces; - FormatStyle Style; - - llvm::SpecificBumpPtrAllocator<StateNode> Allocator; + /// \brief Returns the column limit for a line, taking into account whether we + /// need an escaped newline due to a continued preprocessor directive. + unsigned getColumnLimit(bool InPPDirective, const AnnotatedLine *NextLine) const; // Cache to store the penalty of formatting a vector of AnnotatedLines // starting from a specific additional offset. Improves performance if there // are many nested blocks. std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, unsigned> PenaltyCache; + + ContinuationIndenter *Indenter; + WhitespaceManager *Whitespaces; + const FormatStyle &Style; + const AdditionalKeywords &Keywords; + bool *IncompleteFormat; }; } // end namespace format } // end namespace clang diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index ec04af5231be7..939528fbffe55 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -14,7 +14,9 @@ //===----------------------------------------------------------------------===// #include "UnwrappedLineParser.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "format-parser" @@ -56,22 +58,20 @@ private: class ScopedMacroState : public FormatTokenSource { public: ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, - FormatToken *&ResetToken, bool &StructuralError) + FormatToken *&ResetToken) : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), - StructuralError(StructuralError), - PreviousStructuralError(StructuralError), Token(nullptr) { + Token(nullptr) { TokenSource = this; Line.Level = 0; Line.InPPDirective = true; } - ~ScopedMacroState() { + ~ScopedMacroState() override { TokenSource = PreviousTokenSource; ResetToken = Token; Line.InPPDirective = false; Line.Level = PreviousLineLevel; - StructuralError = PreviousStructuralError; } FormatToken *getNextToken() override { @@ -110,8 +110,6 @@ private: FormatToken *&ResetToken; unsigned PreviousLineLevel; FormatTokenSource *PreviousTokenSource; - bool &StructuralError; - bool PreviousStructuralError; FormatToken *Token; }; @@ -206,9 +204,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), StructuralError(false), Style(Style), - Keywords(Keywords), Tokens(nullptr), Callback(Callback), - AllTokens(Tokens), PPBranchLevel(-1) {} + CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr), + Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -219,11 +216,10 @@ void UnwrappedLineParser::reset() { PreprocessorDirectives.clear(); CurrentLines = &Lines; DeclarationScopeStack.clear(); - StructuralError = false; PPStack.clear(); } -bool UnwrappedLineParser::parse() { +void UnwrappedLineParser::parse() { IndexedTokenSource TokenSource(AllTokens); do { DEBUG(llvm::dbgs() << "----\n"); @@ -256,13 +252,15 @@ bool UnwrappedLineParser::parse() { } } while (!PPLevelBranchIndex.empty()); - return StructuralError; } void UnwrappedLineParser::parseFile() { - ScopedDeclarationState DeclarationState( - *Line, DeclarationScopeStack, - /*MustBeDeclaration=*/ !Line->InPPDirective); + // The top-level context in a file always has declarations, except for pre- + // processor directives and JavaScript files. + bool MustBeDeclaration = + !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, + MustBeDeclaration); parseLevel(/*HasOpeningBrace=*/false); // Make sure to format the remaining tokens. flushComments(true); @@ -286,7 +284,6 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { case tok::r_brace: if (HasOpeningBrace) return; - StructuralError = true; nextToken(); addUnwrappedLine(); break; @@ -305,7 +302,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { } while (!eof()); } -void UnwrappedLineParser::calculateBraceTypes() { +void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { // We'll parse forward through the tokens until we hit // a closing brace or eof - note that getNextToken() will // parse macros, so this will magically work inside macro @@ -328,6 +325,7 @@ void UnwrappedLineParser::calculateBraceTypes() { switch (Tok->Tok.getKind()) { case tok::l_brace: + Tok->BlockKind = BK_Unknown; LBraceStack.push_back(Tok); break; case tok::r_brace: @@ -351,9 +349,11 @@ void UnwrappedLineParser::calculateBraceTypes() { // // We exclude + and - as they can be ObjC visibility modifiers. ProbablyBracedList = - NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon, + NextTok->isOneOf(tok::comma, tok::period, tok::colon, tok::r_paren, tok::r_square, tok::l_brace, tok::l_paren, tok::ellipsis) || + (NextTok->is(tok::semi) && + (!ExpectClassBody || LBraceStack.size() != 1)) || (NextTok->isBinaryOperator() && !NextIsObjCMethod); } if (ProbablyBracedList) { @@ -374,6 +374,7 @@ void UnwrappedLineParser::calculateBraceTypes() { case tok::kw_for: case tok::kw_switch: case tok::kw_try: + case tok::kw___try: if (!LBraceStack.empty()) LBraceStack.back()->BlockKind = BK_Block; break; @@ -407,7 +408,6 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, if (!FormatTok->Tok.is(tok::r_brace)) { Line->Level = InitialLevel; - StructuralError = true; return; } @@ -417,7 +417,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, Line->Level = InitialLevel; } -static bool IsGoogScope(const UnwrappedLine &Line) { +static bool isGoogScope(const UnwrappedLine &Line) { // FIXME: Closure-library specific stuff should not be hard-coded but be // configurable. if (Line.Tokens.size() < 4) @@ -453,12 +453,13 @@ void UnwrappedLineParser::parseChildBlock() { nextToken(); { bool GoogScope = - Style.Language == FormatStyle::LK_JavaScript && IsGoogScope(*Line); + Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line); ScopedLineState LineState(*this); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, /*MustBeDeclaration=*/false); Line->Level += GoogScope ? 0 : 1; parseLevel(/*HasOpeningBrace=*/true); + flushComments(isOnNewLine(*FormatTok)); Line->Level -= GoogScope ? 0 : 1; } nextToken(); @@ -466,7 +467,7 @@ void UnwrappedLineParser::parseChildBlock() { void UnwrappedLineParser::parsePPDirective() { assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); - ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); + ScopedMacroState MacroState(*Line, Tokens, FormatTok); nextToken(); if (!FormatTok->Tok.getIdentifierInfo()) { @@ -549,6 +550,7 @@ void UnwrappedLineParser::conditionalCompilationEnd() { void UnwrappedLineParser::parsePPIf(bool IfDef) { nextToken(); bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && + FormatTok->Tok.getLiteralData() != nullptr && StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) == "0") || FormatTok->Tok.is(tok::kw_false); @@ -602,7 +604,7 @@ void UnwrappedLineParser::parsePPUnknown() { // Here we blacklist certain tokens that are not usually the first token in an // unwrapped line. This is used in attempt to distinguish macro calls without // trailing semicolons from other constructs split to several lines. -bool tokenCanStartNewLine(clang::Token Tok) { +static bool tokenCanStartNewLine(const clang::Token &Tok) { // Semicolon can be a null-statement, l_square can be a start of a macro or // a C++11 attribute, but this doesn't seem to be common. return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && @@ -655,6 +657,11 @@ void UnwrappedLineParser::parseStructuralElement() { nextToken(); addUnwrappedLine(); return; + case tok::objc_try: + // This branch isn't strictly necessary (the kw_try case below would + // do this too after the tok::at is parsed above). But be explicit. + parseTryCatch(); + return; default: break; } @@ -662,10 +669,13 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::kw_asm: nextToken(); if (FormatTok->is(tok::l_brace)) { + FormatTok->Type = TT_InlineASMBrace; nextToken(); while (FormatTok && FormatTok->isNot(tok::eof)) { if (FormatTok->is(tok::r_brace)) { + FormatTok->Type = TT_InlineASMBrace; nextToken(); + addUnwrappedLine(); break; } FormatTok->Finalized = true; @@ -686,7 +696,8 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::kw_public: case tok::kw_protected: case tok::kw_private: - if (Style.Language == FormatStyle::LK_Java) + if (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) nextToken(); else parseAccessSpecifier(); @@ -712,6 +723,7 @@ void UnwrappedLineParser::parseStructuralElement() { parseCaseLabel(); return; case tok::kw_try: + case tok::kw___try: parseTryCatch(); return; case tok::kw_extern: @@ -725,11 +737,30 @@ void UnwrappedLineParser::parseStructuralElement() { } } break; + case tok::kw_export: + if (Style.Language == FormatStyle::LK_JavaScript) { + parseJavaScriptEs6ImportExport(); + return; + } + break; case tok::identifier: - if (FormatTok->IsForEachMacro) { + if (FormatTok->is(TT_ForEachMacro)) { parseForOrWhileLoop(); return; } + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->is(Keywords.kw_import)) { + parseJavaScriptEs6ImportExport(); + return; + } + if (FormatTok->is(Keywords.kw_signals)) { + nextToken(); + if (FormatTok->is(tok::colon)) { + nextToken(); + addUnwrappedLine(); + } + return; + } // In all other cases, parse the declaration. break; default: @@ -806,26 +837,42 @@ void UnwrappedLineParser::parseStructuralElement() { parseTryCatch(); return; case tok::identifier: { - StringRef Text = FormatTok->TokenText; // Parse function literal unless 'function' is the first token in a line // in which case this should be treated as a free-standing function. - if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" && - Line->Tokens.size() > 0) { + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) { tryToParseJSFunction(); break; } + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) && + FormatTok->is(Keywords.kw_interface)) { + parseRecord(); + break; + } + + StringRef Text = FormatTok->TokenText; nextToken(); - if (Line->Tokens.size() == 1) { - if (FormatTok->Tok.is(tok::colon)) { + if (Line->Tokens.size() == 1 && + // JS doesn't have macros, and within classes colons indicate fields, + // not labels. + Style.Language != FormatStyle::LK_JavaScript) { + if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { parseLabel(); return; } // Recognize function-like macro usages without trailing semicolon as - // well as free-standing macrose like Q_OBJECT. + // well as free-standing macros like Q_OBJECT. bool FunctionLike = FormatTok->is(tok::l_paren); if (FunctionLike) parseParens(); - if (FormatTok->NewlinesBefore > 0 && + + bool FollowedByNewline = + CommentsBeforeNextToken.empty() + ? FormatTok->NewlinesBefore > 0 + : CommentsBeforeNextToken.front()->NewlinesBefore > 0; + + if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { addUnwrappedLine(); @@ -835,6 +882,17 @@ void UnwrappedLineParser::parseStructuralElement() { break; } case tok::equal: + // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType + // TT_JsFatArrow. The always start an expression or a child block if + // followed by a curly. + if (FormatTok->is(TT_JsFatArrow)) { + nextToken(); + if (FormatTok->is(tok::l_brace)) { + parseChildBlock(); + } + break; + } + nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { parseBracedList(); @@ -843,6 +901,9 @@ void UnwrappedLineParser::parseStructuralElement() { case tok::l_square: parseSquare(); break; + case tok::kw_new: + parseNew(); + break; default: nextToken(); break; @@ -952,22 +1013,48 @@ void UnwrappedLineParser::tryToParseJSFunction() { // Consume function name. if (FormatTok->is(tok::identifier)) - nextToken(); + nextToken(); if (FormatTok->isNot(tok::l_paren)) return; - nextToken(); - while (FormatTok->isNot(tok::l_brace)) { - // Err on the side of caution in order to avoid consuming the full file in - // case of incomplete code. - if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren, - tok::comment)) - return; + + // Parse formal parameter list. + parseBalanced(tok::l_paren, tok::r_paren); + + if (FormatTok->is(tok::colon)) { + // Parse a type definition. nextToken(); + + // Eat the type declaration. For braced inline object types, balance braces, + // otherwise just parse until finding an l_brace for the function body. + if (FormatTok->is(tok::l_brace)) { + parseBalanced(tok::l_brace, tok::r_brace); + } else { + while(FormatTok->isNot(tok::l_brace) && !eof()) { + nextToken(); + } + } } + parseChildBlock(); } +void UnwrappedLineParser::parseBalanced(tok::TokenKind OpenKind, + tok::TokenKind CloseKind) { + assert(FormatTok->is(OpenKind)); + nextToken(); + int Depth = 1; + while (Depth > 0 && !eof()) { + // Parse the formal parameter list. + if (FormatTok->is(OpenKind)) { + ++Depth; + } else if (FormatTok->is(CloseKind)) { + --Depth; + } + nextToken(); + } +} + bool UnwrappedLineParser::tryToParseBracedList() { if (FormatTok->BlockKind == BK_Unknown) calculateBraceTypes(); @@ -985,10 +1072,19 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssigmentExpression() inside. do { - if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->is(Keywords.kw_function)) { - tryToParseJSFunction(); - continue; + if (Style.Language == FormatStyle::LK_JavaScript) { + if (FormatTok->is(Keywords.kw_function)) { + tryToParseJSFunction(); + continue; + } else if (FormatTok->is(TT_JsFatArrow)) { + nextToken(); + // Fat arrows can be followed by simple expressions or by child blocks + // in curly braces. + if (FormatTok->is(tok::l_brace)){ + parseChildBlock(); + continue; + } + } } switch (FormatTok->Tok.getKind()) { case tok::caret: @@ -1006,6 +1102,17 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { FormatTok->BlockKind = BK_BracedInit; parseBracedList(); break; + case tok::r_paren: + // JavaScript can just have free standing methods and getters/setters in + // object literals. Detect them by a "{" following ")". + if (Style.Language == FormatStyle::LK_JavaScript) { + nextToken(); + if (FormatTok->is(tok::l_brace)) + parseChildBlock(); + break; + } + nextToken(); + break; case tok::r_brace: nextToken(); return !HasError; @@ -1046,9 +1153,8 @@ void UnwrappedLineParser::parseParens() { tryToParseLambda(); break; case tok::l_brace: - if (!tryToParseBracedList()) { + if (!tryToParseBracedList()) parseChildBlock(); - } break; case tok::at: nextToken(); @@ -1088,9 +1194,8 @@ void UnwrappedLineParser::parseSquare() { parseSquare(); break; case tok::l_brace: { - if (!tryToParseBracedList()) { + if (!tryToParseBracedList()) parseChildBlock(); - } break; } case tok::at: @@ -1148,7 +1253,7 @@ void UnwrappedLineParser::parseIfThenElse() { } void UnwrappedLineParser::parseTryCatch() { - assert(FormatTok->is(tok::kw_try) && "'try' expected"); + assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); nextToken(); bool NeedsUnwrappedLine = false; if (FormatTok->is(tok::colon)) { @@ -1158,8 +1263,6 @@ void UnwrappedLineParser::parseTryCatch() { nextToken(); if (FormatTok->is(tok::l_paren)) parseParens(); - else - StructuralError = true; if (FormatTok->is(tok::comma)) nextToken(); } @@ -1182,23 +1285,29 @@ void UnwrappedLineParser::parseTryCatch() { // The C++ standard requires a compound-statement after a try. // If there's none, we try to assume there's a structuralElement // and try to continue. - StructuralError = true; addUnwrappedLine(); ++Line->Level; parseStructuralElement(); --Line->Level; } - while (FormatTok->is(tok::kw_catch) || - ((Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) && - FormatTok->is(Keywords.kw_finally))) { + while (1) { + if (FormatTok->is(tok::at)) + nextToken(); + if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, + tok::kw___finally) || + ((Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) && + FormatTok->is(Keywords.kw_finally)) || + (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || + FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) + break; nextToken(); while (FormatTok->isNot(tok::l_brace)) { if (FormatTok->is(tok::l_paren)) { parseParens(); continue; } - if (FormatTok->isOneOf(tok::semi, tok::r_brace)) + if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) return; nextToken(); } @@ -1242,9 +1351,33 @@ void UnwrappedLineParser::parseNamespace() { // FIXME: Add error handling. } +void UnwrappedLineParser::parseNew() { + assert(FormatTok->is(tok::kw_new) && "'new' expected"); + nextToken(); + if (Style.Language != FormatStyle::LK_Java) + return; + + // In Java, we can parse everything up to the parens, which aren't optional. + do { + // There should not be a ;, { or } before the new's open paren. + if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) + return; + + // Consume the parens. + if (FormatTok->is(tok::l_paren)) { + parseParens(); + + // If there is a class body of an anonymous class, consume that as child. + if (FormatTok->is(tok::l_brace)) + parseChildBlock(); + return; + } + nextToken(); + } while (!eof()); +} + void UnwrappedLineParser::parseForOrWhileLoop() { - assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) || - FormatTok->IsForEachMacro) && + assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && "'for', 'while' or foreach macro expected"); nextToken(); if (FormatTok->Tok.is(tok::l_paren)) @@ -1304,6 +1437,8 @@ void UnwrappedLineParser::parseLabel() { } addUnwrappedLine(); } else { + if (FormatTok->is(tok::semi)) + nextToken(); addUnwrappedLine(); } Line->Level = OldLineLevel; @@ -1338,8 +1473,7 @@ void UnwrappedLineParser::parseSwitch() { void UnwrappedLineParser::parseAccessSpecifier() { nextToken(); // Understand Qt's slots. - if (FormatTok->is(tok::identifier) && - (FormatTok->TokenText == "slots" || FormatTok->TokenText == "Q_SLOTS")) + if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. if (FormatTok->Tok.is(tok::colon)) @@ -1455,37 +1589,45 @@ void UnwrappedLineParser::parseJavaEnumBody() { void UnwrappedLineParser::parseRecord() { const FormatToken &InitialToken = *FormatTok; nextToken(); - if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute, - tok::kw___declspec, tok::kw_alignas)) { + + + // The actual identifier can be a nested name specifier, and in macros + // it is often token-pasted. + while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, + tok::kw___attribute, tok::kw___declspec, + tok::kw_alignas) || + ((Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) && + FormatTok->isOneOf(tok::period, tok::comma))) { + bool IsNonMacroIdentifier = + FormatTok->is(tok::identifier) && + FormatTok->TokenText != FormatTok->TokenText.upper(); nextToken(); // We can have macros or attributes in between 'class' and the class name. - if (FormatTok->Tok.is(tok::l_paren)) { + if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) parseParens(); - } - // The actual identifier can be a nested name specifier, and in macros - // it is often token-pasted. - while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) || - FormatTok->is(tok::hashhash) || - (Style.Language == FormatStyle::LK_Java && - FormatTok->isOneOf(tok::period, tok::comma))) - nextToken(); + } - // Note that parsing away template declarations here leads to incorrectly - // accepting function declarations as record declarations. - // In general, we cannot solve this problem. Consider: - // class A<int> B() {} - // which can be a function definition or a class definition when B() is a - // macro. If we find enough real-world cases where this is a problem, we - // can parse for the 'template' keyword in the beginning of the statement, - // and thus rule out the record production in case there is no template - // (this would still leave us with an ambiguity between template function - // and class declarations). - if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { - while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { - if (FormatTok->Tok.is(tok::semi)) - return; - nextToken(); + // Note that parsing away template declarations here leads to incorrectly + // accepting function declarations as record declarations. + // In general, we cannot solve this problem. Consider: + // class A<int> B() {} + // which can be a function definition or a class definition when B() is a + // macro. If we find enough real-world cases where this is a problem, we + // can parse for the 'template' keyword in the beginning of the statement, + // and thus rule out the record production in case there is no template + // (this would still leave us with an ambiguity between template function + // and class declarations). + if (FormatTok->isOneOf(tok::colon, tok::less)) { + while (!eof()) { + if (FormatTok->is(tok::l_brace)) { + calculateBraceTypes(/*ExpectClassBody=*/true); + if (!tryToParseBracedList()) + break; } + if (FormatTok->Tok.is(tok::semi)) + return; + nextToken(); } } if (FormatTok->Tok.is(tok::l_brace)) { @@ -1498,8 +1640,9 @@ void UnwrappedLineParser::parseRecord() { // We fall through to parsing a structural element afterwards, so // class A {} n, m; // will end up in one unwrapped line. - // This does not apply for Java. - if (Style.Language == FormatStyle::LK_Java) + // This does not apply for Java and JavaScript. + if (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript) addUnwrappedLine(); } @@ -1578,6 +1721,35 @@ void UnwrappedLineParser::parseObjCProtocol() { parseObjCUntilAtEnd(); } +void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { + assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export)); + nextToken(); + + // Consume the "default" in "export default class/function". + if (FormatTok->is(tok::kw_default)) + nextToken(); + + // Consume "function" and "default function", so that these get parsed as + // free-standing JS functions, i.e. do not require a trailing semicolon. + if (FormatTok->is(Keywords.kw_function)) { + nextToken(); + return; + } + + if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_var)) + return; // Fall through to parsing the corresponding structure. + + if (FormatTok->is(tok::l_brace)) { + FormatTok->BlockKind = BK_Block; + parseBracedList(); + } + + while (!eof() && FormatTok->isNot(tok::semi) && + FormatTok->isNot(tok::l_brace)) { + nextToken(); + } +} + LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix = "") { llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" @@ -1634,14 +1806,12 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { I = CommentsBeforeNextToken.begin(), E = CommentsBeforeNextToken.end(); I != E; ++I) { - if (isOnNewLine(**I) && JustComments) { + if (isOnNewLine(**I) && JustComments) addUnwrappedLine(); - } pushToken(*I); } - if (NewlineBeforeNext && JustComments) { + if (NewlineBeforeNext && JustComments) addUnwrappedLine(); - } CommentsBeforeNextToken.clear(); } @@ -1662,8 +1832,7 @@ void UnwrappedLineParser::readToken() { (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. - bool SwitchToPreprocessorLines = - !Line->Tokens.empty() && CurrentLines == &Lines; + bool SwitchToPreprocessorLines = !Line->Tokens.empty(); ScopedLineState BlockState(*this, SwitchToPreprocessorLines); // Comments stored before the preprocessor directive need to be output // before the preprocessor directive, at the same level as the diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 3218afecad309..6a6e56fea0280 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -65,8 +65,7 @@ public: ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback); - /// Returns true in case of a structural error. - bool parse(); + void parse(); private: void reset(); @@ -95,6 +94,7 @@ private: void parseCaseLabel(); void parseSwitch(); void parseNamespace(); + void parseNew(); void parseAccessSpecifier(); void parseEnum(); void parseJavaEnumBody(); @@ -103,16 +103,22 @@ private: void parseObjCUntilAtEnd(); void parseObjCInterfaceOrImplementation(); void parseObjCProtocol(); + void parseJavaScriptEs6ImportExport(); bool tryToParseLambda(); bool tryToParseLambdaIntroducer(); void tryToParseJSFunction(); + /// \brief Parses tokens until encountering the CloseKind token, but balances + /// tokens when encountering more OpenKind tokens. Useful for e.g. parsing a + /// curly brace delimited block that can contain nested blocks. + /// The parser must be positioned on a token of OpenKind. + void parseBalanced(tok::TokenKind OpenKind, tok::TokenKind CloseKind); void addUnwrappedLine(); bool eof() const; void nextToken(); void readToken(); void flushComments(bool NewlineBeforeNext); void pushToken(FormatToken *Tok); - void calculateBraceTypes(); + void calculateBraceTypes(bool ExpectClassBody = false); // Marks a conditional compilation edge (for example, an '#if', '#ifdef', // '#else' or merge conflict marker). If 'Unreachable' is true, assumes @@ -156,10 +162,6 @@ private: // whether we are in a compound statement or not. std::vector<bool> DeclarationScopeStack; - // Will be true if we encounter an error that leads to possibily incorrect - // indentation levels. - bool StructuralError; - const FormatStyle &Style; const AdditionalKeywords &Keywords; diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index bf1207e59c902..4baaab1c9877f 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -36,7 +36,9 @@ WhitespaceManager::Change::Change( PreviousLinePostfix(PreviousLinePostfix), CurrentLinePrefix(CurrentLinePrefix), Kind(Kind), ContinuesPPDirective(ContinuesPPDirective), IndentLevel(IndentLevel), - Spaces(Spaces) {} + Spaces(Spaces), IsTrailingComment(false), TokenLength(0), + PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), + StartOfBlockComment(nullptr), IndentationOffset(0) {} void WhitespaceManager::reset() { Changes.clear(); @@ -91,6 +93,7 @@ const tooling::Replacements &WhitespaceManager::generateReplacements() { std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr)); calculateLineBreakInformation(); + alignConsecutiveAssignments(); alignTrailingComments(); alignEscapedNewlines(); generateChanges(); @@ -139,6 +142,96 @@ void WhitespaceManager::calculateLineBreakInformation() { } } +// Walk through all of the changes and find sequences of "=" to align. To do +// so, keep track of the lines and whether or not an "=" was found on align. If +// a "=" is found on a line, extend the current sequence. If the current line +// cannot be part of a sequence, e.g. because there is an empty line before it +// or it contains non-assignments, finalize the previous sequence. +void WhitespaceManager::alignConsecutiveAssignments() { + if (!Style.AlignConsecutiveAssignments) + return; + + unsigned MinColumn = 0; + unsigned StartOfSequence = 0; + unsigned EndOfSequence = 0; + bool FoundAssignmentOnLine = false; + bool FoundLeftParenOnLine = false; + unsigned CurrentLine = 0; + + auto AlignSequence = [&] { + alignConsecutiveAssignments(StartOfSequence, EndOfSequence, MinColumn); + MinColumn = 0; + StartOfSequence = 0; + EndOfSequence = 0; + }; + + for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + if (Changes[i].NewlinesBefore != 0) { + CurrentLine += Changes[i].NewlinesBefore; + if (StartOfSequence > 0 && + (Changes[i].NewlinesBefore > 1 || !FoundAssignmentOnLine)) { + EndOfSequence = i; + AlignSequence(); + } + FoundAssignmentOnLine = false; + FoundLeftParenOnLine = false; + } + + if ((Changes[i].Kind == tok::equal && + (FoundAssignmentOnLine || ((Changes[i].NewlinesBefore > 0 || + Changes[i + 1].NewlinesBefore > 0)))) || + (!FoundLeftParenOnLine && Changes[i].Kind == tok::r_paren)) { + if (StartOfSequence > 0) + AlignSequence(); + } else if (Changes[i].Kind == tok::l_paren) { + FoundLeftParenOnLine = true; + if (!FoundAssignmentOnLine && StartOfSequence > 0) + AlignSequence(); + } else if (!FoundAssignmentOnLine && !FoundLeftParenOnLine && + Changes[i].Kind == tok::equal) { + FoundAssignmentOnLine = true; + EndOfSequence = i; + if (StartOfSequence == 0) + StartOfSequence = i; + + unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; + MinColumn = std::max(MinColumn, ChangeMinColumn); + } + } + + if (StartOfSequence > 0) { + EndOfSequence = Changes.size(); + AlignSequence(); + } +} + +void WhitespaceManager::alignConsecutiveAssignments(unsigned Start, + unsigned End, + unsigned Column) { + bool AlignedAssignment = false; + int PreviousShift = 0; + for (unsigned i = Start; i != End; ++i) { + int Shift = 0; + if (Changes[i].NewlinesBefore > 0) + AlignedAssignment = false; + if (!AlignedAssignment && Changes[i].Kind == tok::equal) { + Shift = Column - Changes[i].StartOfTokenColumn; + AlignedAssignment = true; + PreviousShift = Shift; + } + assert(Shift >= 0); + Changes[i].Spaces += Shift; + if (i + 1 != Changes.size()) + Changes[i + 1].PreviousEndOfTokenColumn += Shift; + Changes[i].StartOfTokenColumn += Shift; + if (AlignedAssignment) { + Changes[i].StartOfTokenColumn += PreviousShift; + if (i + 1 != Changes.size()) + Changes[i + 1].PreviousEndOfTokenColumn += PreviousShift; + } + } +} + void WhitespaceManager::alignTrailingComments() { unsigned MinColumn = 0; unsigned MaxColumn = UINT_MAX; @@ -264,6 +357,11 @@ void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End, void WhitespaceManager::generateChanges() { for (unsigned i = 0, e = Changes.size(); i != e; ++i) { const Change &C = Changes[i]; + if (i > 0) { + assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() != + C.OriginalWhitespaceRange.getBegin() && + "Generating two replacements for the same location"); + } if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; if (C.ContinuesPPDirective) diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index 28730d457eba8..4bfc813b2c349 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -164,6 +164,13 @@ private: /// \c EscapedNewlineColumn for the first tokens or token parts in a line. void calculateLineBreakInformation(); + /// \brief Align consecutive assignments over all \c Changes. + void alignConsecutiveAssignments(); + + /// \brief Align consecutive assignments from change \p Start to change \p End at + /// the specified \p Column. + void alignConsecutiveAssignments(unsigned Start, unsigned End, unsigned Column); + /// \brief Align trailing comments over all \c Changes. void alignTrailingComments(); |