summaryrefslogtreecommitdiff
path: root/lib/Format
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-05-27 18:47:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-05-27 18:47:56 +0000
commit5e20cdd81c44a443562a09007668ffdf76c455af (patch)
treedbbd4047878da71c1a706e26ce05b4e7791b14cc /lib/Format
parentd5f23b0b7528b5c3caed1ba14f897cc4aaa9e3c3 (diff)
Notes
Diffstat (limited to 'lib/Format')
-rw-r--r--lib/Format/BreakableToken.cpp18
-rw-r--r--lib/Format/ContinuationIndenter.cpp184
-rw-r--r--lib/Format/Format.cpp292
-rw-r--r--lib/Format/FormatToken.cpp55
-rw-r--r--lib/Format/FormatToken.h194
-rw-r--r--lib/Format/TokenAnnotator.cpp433
-rw-r--r--lib/Format/TokenAnnotator.h22
-rw-r--r--lib/Format/UnwrappedLineFormatter.cpp941
-rw-r--r--lib/Format/UnwrappedLineFormatter.h132
-rw-r--r--lib/Format/UnwrappedLineParser.cpp361
-rw-r--r--lib/Format/UnwrappedLineParser.h16
-rw-r--r--lib/Format/WhitespaceManager.cpp100
-rw-r--r--lib/Format/WhitespaceManager.h7
13 files changed, 1788 insertions, 967 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index 26f1371b4092e..66e935abdf558 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -106,7 +106,7 @@ getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
}
- if (Chars > MaxSplit || Text.size() == Advance)
+ if (Chars > MaxSplit || Text.size() <= Advance)
break;
if (IsBlank(Text[0]))
@@ -277,6 +277,8 @@ BreakableBlockComment::BreakableBlockComment(
// If the last line is empty, the closing "*/" will have a star.
if (i + 1 == e && Lines[i].empty())
break;
+ if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i]))
+ continue;
while (!Lines[i].startswith(Decoration))
Decoration = Decoration.substr(0, Decoration.size() - 1);
}
@@ -297,14 +299,18 @@ BreakableBlockComment::BreakableBlockComment(
}
continue;
}
+
// The first line already excludes the star.
// For all other lines, adjust the line to exclude the star and
// (optionally) the first whitespace.
- StartOfLineColumn[i] += Decoration.size();
- Lines[i] = Lines[i].substr(Decoration.size());
- LeadingWhitespace[i] += Decoration.size();
- IndentAtLineBreak =
- std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
+ unsigned DecorationSize =
+ Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size();
+ StartOfLineColumn[i] += DecorationSize;
+ Lines[i] = Lines[i].substr(DecorationSize);
+ LeadingWhitespace[i] += DecorationSize;
+ if (!Decoration.startswith(Lines[i]))
+ IndentAtLineBreak =
+ std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
}
IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
DEBUG({
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 4cc92b02a9e58..4e8f5af263d2f 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -143,11 +143,10 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
return true;
if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
- (Style.BreakBeforeTernaryOperators &&
- (Current.is(tok::question) ||
- (Current.is(TT_ConditionalExpr) && Previous.isNot(tok::question)))) ||
+ (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
+ Previous.isNot(tok::question)) ||
(!Style.BreakBeforeTernaryOperators &&
- (Previous.is(tok::question) || Previous.is(TT_ConditionalExpr)))) &&
+ Previous.is(TT_ConditionalExpr))) &&
State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() &&
!Current.isOneOf(tok::r_paren, tok::r_brace))
return true;
@@ -166,10 +165,17 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) ||
Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0))
return true;
+ if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound &&
+ State.Stack.back().BreakBeforeParameter)
+ return true;
if (State.Column < getNewLineColumn(State))
return false;
- if (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None) {
+
+ // Using CanBreakBefore here and below takes care of the decision whether the
+ // current style uses wrapping before or after operators for the given
+ // operator.
+ if (Previous.is(TT_BinaryOperator) && Current.CanBreakBefore) {
// If we need to break somewhere inside the LHS of a binary expression, we
// should also break after the operator. Otherwise, the formatting would
// hide the operator precedence, e.g. in:
@@ -185,16 +191,13 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
Previous.Previous->isNot(TT_BinaryOperator); // For >>.
bool LHSIsBinaryExpr =
Previous.Previous && Previous.Previous->EndsBinaryExpression;
- if (Previous.is(TT_BinaryOperator) && (!IsComparison || LHSIsBinaryExpr) &&
- Current.isNot(TT_BinaryOperator) && // For >>.
- !Current.isTrailingComment() && !Previous.is(tok::lessless) &&
+ if ((!IsComparison || LHSIsBinaryExpr) && !Current.isTrailingComment() &&
Previous.getPrecedence() != prec::Assignment &&
State.Stack.back().BreakBeforeParameter)
return true;
- } else {
- if (Current.is(TT_BinaryOperator) && Previous.EndsBinaryExpression &&
- State.Stack.back().BreakBeforeParameter)
- return true;
+ } else if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore &&
+ State.Stack.back().BreakBeforeParameter) {
+ return true;
}
// Same as above, but for the first "<<" operator.
@@ -203,12 +206,14 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
State.Stack.back().FirstLessLess == 0)
return true;
- if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound &&
- State.Stack.back().BreakBeforeParameter)
- return true;
if (Current.NestingLevel == 0 && !Current.isTrailingComment()) {
+ // Always break after "template <...>" and leading annotations. This is only
+ // for cases where the entire line does not fit on a single line as a
+ // different LineFormatter would be used otherwise.
if (Previous.ClosesTemplateDeclaration)
return true;
+ if (Previous.is(TT_FunctionAnnotationRParen))
+ return true;
if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) &&
Current.isNot(TT_LeadingJavaAnnotation))
return true;
@@ -221,8 +226,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
if (startsSegmentOfBuilderTypeCall(Current) &&
(State.Stack.back().CallContinuation != 0 ||
- (State.Stack.back().BreakBeforeParameter &&
- State.Stack.back().ContainsUnwrappedBuilder)))
+ State.Stack.back().BreakBeforeParameter))
return true;
// The following could be precomputed as they do not depend on the state.
@@ -232,6 +236,10 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
Previous.is(tok::l_brace) && !Current.isOneOf(tok::r_brace, tok::comment))
return true;
+ if (Current.is(tok::lessless) && Previous.is(tok::identifier) &&
+ Previous.TokenText == "endl")
+ return true;
+
return false;
}
@@ -245,12 +253,18 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
(Current.Previous->Tok.getIdentifierInfo() == nullptr ||
Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() ==
tok::pp_not_keyword))) {
- // FIXME: Is this correct?
- int WhitespaceLength = SourceMgr.getSpellingColumnNumber(
- State.NextToken->WhitespaceRange.getEnd()) -
- SourceMgr.getSpellingColumnNumber(
- State.NextToken->WhitespaceRange.getBegin());
- State.Column += WhitespaceLength;
+ unsigned EndColumn =
+ SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getEnd());
+ if (Current.LastNewlineOffset != 0) {
+ // If there is a newline within this token, the final column will solely
+ // determined by the current end column.
+ State.Column = EndColumn;
+ } else {
+ unsigned StartColumn =
+ SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getBegin());
+ assert(EndColumn >= StartColumn);
+ State.Column += EndColumn - StartColumn;
+ }
moveStateToNextToken(State, DryRun, /*Newline=*/false);
return 0;
}
@@ -297,7 +311,9 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
else if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
State.Column + Spaces + Current.ColumnWidth)
State.Stack.back().ColonPos =
- State.Stack.back().Indent + Current.LongestObjCSelectorName;
+ std::max(State.FirstIndent + Style.ContinuationIndentWidth,
+ State.Stack.back().Indent) +
+ Current.LongestObjCSelectorName;
else
State.Stack.back().ColonPos = State.Column + Spaces + Current.ColumnWidth;
}
@@ -308,9 +324,12 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
State.Stack.back().Indent = State.Column + Spaces;
if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style))
State.Stack.back().NoLineBreak = true;
- if (startsSegmentOfBuilderTypeCall(Current))
+ if (startsSegmentOfBuilderTypeCall(Current) &&
+ State.Column > getNewLineColumn(State))
State.Stack.back().ContainsUnwrappedBuilder = true;
+ if (Current.is(TT_LambdaArrow))
+ State.Stack.back().NoLineBreak = true;
if (Current.isMemberAccess() && Previous.is(tok::r_paren) &&
(Previous.MatchingParen &&
(Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) {
@@ -359,7 +378,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
const FormatToken *Next = Previous.MatchingParen->getNextNonComment();
HasTrailingCall = Next && Next->isMemberAccess();
}
- if (HasTrailingCall &&
+ if (HasTrailingCall && State.Stack.size() > 1 &&
State.Stack[State.Stack.size() - 2].CallContinuation == 0)
State.Stack.back().LastSpace = State.Column;
}
@@ -406,7 +425,11 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
State.Stack.back().AlignColons = false;
} else {
State.Stack.back().ColonPos =
- State.Stack.back().Indent + NextNonComment->LongestObjCSelectorName;
+ (Style.IndentWrappedFunctionNames
+ ? std::max(State.Stack.back().Indent,
+ State.FirstIndent + Style.ContinuationIndentWidth)
+ : State.Stack.back().Indent) +
+ NextNonComment->LongestObjCSelectorName;
}
} else if (State.Stack.back().AlignColons &&
State.Stack.back().ColonPos <= NextNonComment->ColumnWidth) {
@@ -468,8 +491,9 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
!PreviousNonComment->isOneOf(tok::comma, tok::semi) &&
(PreviousNonComment->isNot(TT_TemplateCloser) ||
Current.NestingLevel != 0) &&
- !PreviousNonComment->isOneOf(TT_BinaryOperator, TT_JavaAnnotation,
- TT_LeadingJavaAnnotation) &&
+ !PreviousNonComment->isOneOf(
+ TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
+ TT_LeadingJavaAnnotation) &&
Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope())
State.Stack.back().BreakBeforeParameter = true;
@@ -516,7 +540,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
if (NextNonComment->is(tok::l_brace) && NextNonComment->BlockKind == BK_Block)
return Current.NestingLevel == 0 ? State.FirstIndent
: State.Stack.back().Indent;
- if (Current.isOneOf(tok::r_brace, tok::r_square)) {
+ if (Current.isOneOf(tok::r_brace, tok::r_square) && State.Stack.size() > 1) {
if (Current.closesBlockTypeList(Style))
return State.Stack[State.Stack.size() - 2].NestedBlockIndent;
if (Current.MatchingParen &&
@@ -529,6 +553,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
return State.Stack.back().Indent;
if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
return State.StartOfStringLiteral;
+ if (NextNonComment->is(TT_ObjCStringLiteral) &&
+ State.StartOfStringLiteral != 0)
+ return State.StartOfStringLiteral - 1;
if (NextNonComment->is(tok::lessless) &&
State.Stack.back().FirstLessLess != 0)
return State.Stack.back().FirstLessLess;
@@ -546,8 +573,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
return State.Stack.back().VariablePos;
if ((PreviousNonComment &&
(PreviousNonComment->ClosesTemplateDeclaration ||
- PreviousNonComment->isOneOf(TT_AttributeParen, TT_JavaAnnotation,
- TT_LeadingJavaAnnotation))) ||
+ PreviousNonComment->isOneOf(
+ TT_AttributeParen, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
+ TT_LeadingJavaAnnotation))) ||
(!Style.IndentWrappedFunctionNames &&
NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName)))
return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent);
@@ -555,7 +583,10 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
if (!State.Stack.back().ObjCSelectorNameFound) {
if (NextNonComment->LongestObjCSelectorName == 0)
return State.Stack.back().Indent;
- return State.Stack.back().Indent +
+ return (Style.IndentWrappedFunctionNames
+ ? std::max(State.Stack.back().Indent,
+ State.FirstIndent + Style.ContinuationIndentWidth)
+ : State.Stack.back().Indent) +
NextNonComment->LongestObjCSelectorName -
NextNonComment->ColumnWidth;
}
@@ -570,10 +601,16 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
return State.Stack.back().StartOfArraySubscripts;
return ContinuationIndent;
}
- if (NextNonComment->is(TT_StartOfName) ||
- Previous.isOneOf(tok::coloncolon, tok::equal)) {
+
+ // This ensure that we correctly format ObjC methods calls without inputs,
+ // i.e. where the last element isn't selector like: [callee method];
+ if (NextNonComment->is(tok::identifier) && NextNonComment->FakeRParens == 0 &&
+ NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr))
+ return State.Stack.back().Indent;
+
+ if (NextNonComment->isOneOf(TT_StartOfName, TT_PointerOrReference) ||
+ Previous.isOneOf(tok::coloncolon, tok::equal))
return ContinuationIndent;
- }
if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))
return ContinuationIndent;
@@ -621,7 +658,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
std::min(State.LowestLevelOnLine, Current.NestingLevel);
if (Current.isMemberAccess())
State.Stack.back().StartOfFunctionCall =
- Current.LastOperator ? 0 : State.Column + Current.ColumnWidth;
+ Current.LastOperator ? 0 : State.Column;
if (Current.is(TT_SelectorName))
State.Stack.back().ObjCSelectorNameFound = true;
if (Current.is(TT_CtorInitializerColon)) {
@@ -637,12 +674,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.Stack.back().AvoidBinPacking = true;
State.Stack.back().BreakBeforeParameter = false;
}
-
- // In ObjC method declaration we align on the ":" of parameters, but we need
- // to ensure that we indent parameters on subsequent lines by at least our
- // continuation indent width.
- if (Current.is(TT_ObjCMethodSpecifier))
- State.Stack.back().Indent += Style.ContinuationIndentWidth;
+ if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline)
+ State.Stack.back().NestedBlockIndent =
+ State.Column + Current.ColumnWidth + 1;
// Insert scopes created by fake parenthesis.
const FormatToken *Previous = Current.getPreviousNonComment();
@@ -675,12 +709,13 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
moveStatePastScopeCloser(State);
moveStatePastFakeRParens(State);
- if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) {
+ if (Current.isStringLiteral() && State.StartOfStringLiteral == 0)
State.StartOfStringLiteral = State.Column;
- } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
- !Current.isStringLiteral()) {
+ if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
+ State.StartOfStringLiteral = State.Column + 1;
+ else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
+ !Current.isStringLiteral())
State.StartOfStringLiteral = 0;
- }
State.Column += Current.ColumnWidth;
State.NextToken = State.NextToken->Next;
@@ -712,7 +747,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
// 'return', assignments or opening <({[. The indentation for these cases
// is special cased.
bool SkipFirstExtraIndent =
- (Previous && (Previous->opensScope() || Previous->is(tok::kw_return) ||
+ (Previous && (Previous->opensScope() ||
+ Previous->isOneOf(tok::semi, tok::kw_return) ||
(Previous->getPrecedence() == prec::Assignment &&
Style.AlignOperands) ||
Previous->is(TT_ObjCMethodExpr)));
@@ -783,7 +819,6 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) {
for (unsigned i = 0, e = State.NextToken->FakeRParens; i != e; ++i) {
unsigned VariablePos = State.Stack.back().VariablePos;
- assert(State.Stack.size() > 1);
if (State.Stack.size() == 1) {
// Do not pop the last element.
break;
@@ -806,6 +841,7 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
unsigned NewIndent;
unsigned NewIndentLevel = State.Stack.back().IndentLevel;
+ unsigned LastSpace = State.Stack.back().LastSpace;
bool AvoidBinPacking;
bool BreakBeforeParameter = false;
if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
@@ -815,17 +851,28 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
++NewIndentLevel;
} else {
NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth;
- NewIndent = std::min(State.Column + 1, NewIndent);
}
const FormatToken *NextNoComment = Current.getNextNonComment();
AvoidBinPacking =
Current.isOneOf(TT_ArrayInitializerLSquare, TT_DictLiteral) ||
- Style.Language == FormatStyle::LK_Proto || !Style.BinPackParameters ||
+ Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments ||
(NextNoComment && NextNoComment->is(TT_DesignatedInitializerPeriod));
} else {
NewIndent = Style.ContinuationIndentWidth +
std::max(State.Stack.back().LastSpace,
State.Stack.back().StartOfFunctionCall);
+
+ // Ensure that different different brackets force relative alignment, e.g.:
+ // void SomeFunction(vector< // break
+ // int> v);
+ // FIXME: We likely want to do this for more combinations of brackets.
+ // Verify that it is wanted for ObjC, too.
+ if (Current.Tok.getKind() == tok::less &&
+ Current.ParentBracket == tok::l_paren) {
+ NewIndent = std::max(NewIndent, State.Stack.back().Indent);
+ LastSpace = std::max(LastSpace, State.Stack.back().Indent);
+ }
+
AvoidBinPacking =
(State.Line->MustBeDeclaration && !Style.BinPackParameters) ||
(!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
@@ -833,19 +880,33 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
(Current.PackingKind == PPK_OnePerLine ||
(!BinPackInconclusiveFunctions &&
Current.PackingKind == PPK_Inconclusive)));
- // If this '[' opens an ObjC call, determine whether all parameters fit
- // into one line and put one per line if they don't.
- if (Current.is(TT_ObjCMethodExpr) && Style.ColumnLimit != 0 &&
- getLengthToMatchingParen(Current) + State.Column >
+ if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) {
+ if (Style.ColumnLimit) {
+ // If this '[' opens an ObjC call, determine whether all parameters fit
+ // into one line and put one per line if they don't.
+ if (getLengthToMatchingParen(Current) + State.Column >
getColumnLimit(State))
- BreakBeforeParameter = true;
+ BreakBeforeParameter = true;
+ } else {
+ // For ColumnLimit = 0, we have to figure out whether there is or has to
+ // be a line break within this call.
+ for (const FormatToken *Tok = &Current;
+ Tok && Tok != Current.MatchingParen; Tok = Tok->Next) {
+ if (Tok->MustBreakBefore ||
+ (Tok->CanBreakBefore && Tok->NewlinesBefore > 0)) {
+ BreakBeforeParameter = true;
+ break;
+ }
+ }
+ }
+ }
}
bool NoLineBreak = State.Stack.back().NoLineBreak ||
(Current.is(TT_TemplateOpener) &&
State.Stack.back().ContainsUnwrappedBuilder);
- unsigned NestedBlockIndent = State.Stack.back().NestedBlockIndent;
- State.Stack.push_back(ParenState(NewIndent, NewIndentLevel,
- State.Stack.back().LastSpace,
+ unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall,
+ State.Stack.back().NestedBlockIndent);
+ State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, LastSpace,
AvoidBinPacking, NoLineBreak));
State.Stack.back().NestedBlockIndent = NestedBlockIndent;
State.Stack.back().BreakBeforeParameter = BreakBeforeParameter;
@@ -1082,8 +1143,9 @@ bool ContinuationIndenter::nextIsMultilineString(const LineState &State) {
if (Current.getNextNonComment() &&
Current.getNextNonComment()->isStringLiteral())
return true; // Implicit concatenation.
- if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
- Style.ColumnLimit)
+ if (Style.ColumnLimit != 0 &&
+ State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
+ Style.ColumnLimit)
return true; // String will be split.
return false;
}
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 2a4721f2b3b7d..10c68f9da6174 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -109,10 +109,8 @@ struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
}
};
-template <>
-struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
- static void enumeration(IO &IO,
- FormatStyle::PointerAlignmentStyle &Value) {
+template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
+ static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
@@ -144,8 +142,8 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("Language", Style.Language);
if (IO.outputting()) {
- StringRef StylesArray[] = { "LLVM", "Google", "Chromium",
- "Mozilla", "WebKit", "GNU" };
+ StringRef StylesArray[] = {"LLVM", "Google", "Chromium",
+ "Mozilla", "WebKit", "GNU"};
ArrayRef<StringRef> Styles(StylesArray);
for (size_t i = 0, e = Styles.size(); i < e; ++i) {
StringRef StyleName(Styles[i]);
@@ -176,6 +174,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
IO.mapOptional("AlignOperands", Style.AlignOperands);
IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
+ IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments);
IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
Style.AllowAllParametersOfDeclarationOnNextLine);
IO.mapOptional("AllowShortBlocksOnASingleLine",
@@ -273,7 +272,7 @@ template <> struct MappingTraits<FormatStyle> {
// will be used to get default values for missing keys.
// If the first element has no Language specified, it will be treated as the
// default one for the following elements.
-template <> struct DocumentListTraits<std::vector<FormatStyle> > {
+template <> struct DocumentListTraits<std::vector<FormatStyle>> {
static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
return Seq.size();
}
@@ -331,6 +330,7 @@ FormatStyle getLLVMStyle() {
LLVMStyle.AlignAfterOpenBracket = true;
LLVMStyle.AlignOperands = true;
LLVMStyle.AlignTrailingComments = true;
+ LLVMStyle.AlignConsecutiveAssignments = false;
LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
LLVMStyle.AllowShortBlocksOnASingleLine = false;
@@ -600,10 +600,10 @@ public:
FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
encoding::Encoding Encoding)
: FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
- Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
- Style(Style), IdentTable(getFormattingLangOpts(Style)),
- Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
- FormattingDisabled(false) {
+ LessStashed(false), Column(0), TrailingWhitespace(0),
+ SourceMgr(SourceMgr), ID(ID), Style(Style),
+ IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
+ Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) {
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
getFormattingLangOpts(Style)));
Lex->SetKeepWhitespaceMode(true);
@@ -619,7 +619,7 @@ public:
do {
Tokens.push_back(getNextToken());
tryMergePreviousTokens();
- if (Tokens.back()->NewlinesBefore > 0)
+ if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->Tok.isNot(tok::eof));
return Tokens;
@@ -633,32 +633,62 @@ private:
return;
if (tryMergeConflictMarkers())
return;
+ if (tryMergeLessLess())
+ return;
if (Style.Language == FormatStyle::LK_JavaScript) {
if (tryMergeJSRegexLiteral())
return;
if (tryMergeEscapeSequence())
return;
+ if (tryMergeTemplateString())
+ return;
- static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
- static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
- static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
- tok::greaterequal };
- static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater };
- // FIXME: We probably need to change token type to mimic operator with the
- // correct priority.
- if (tryMergeTokens(JSIdentity))
+ static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
+ static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
+ tok::equal};
+ static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
+ tok::greaterequal};
+ static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
+ // FIXME: Investigate what token type gives the correct operator priority.
+ if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
return;
- if (tryMergeTokens(JSNotIdentity))
+ if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
return;
- if (tryMergeTokens(JSShiftEqual))
+ if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
return;
- if (tryMergeTokens(JSRightArrow))
+ if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
return;
}
}
- bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
+ bool tryMergeLessLess() {
+ // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
+ if (Tokens.size() < 3)
+ return false;
+
+ bool FourthTokenIsLess = false;
+ if (Tokens.size() > 3)
+ FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
+
+ auto First = Tokens.end() - 3;
+ if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
+ First[0]->isNot(tok::less) || FourthTokenIsLess)
+ return false;
+
+ // Only merge if there currently is no whitespace between the two "<".
+ if (First[1]->WhitespaceRange.getBegin() !=
+ First[1]->WhitespaceRange.getEnd())
+ return false;
+
+ First[0]->Tok.setKind(tok::lessless);
+ First[0]->TokenText = "<<";
+ First[0]->ColumnWidth += 1;
+ Tokens.erase(Tokens.end() - 2);
+ return true;
+ }
+
+ bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
if (Tokens.size() < Kinds.size())
return false;
@@ -668,8 +698,9 @@ private:
return false;
unsigned AddLength = 0;
for (unsigned i = 1; i < Kinds.size(); ++i) {
- if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
- First[i]->WhitespaceRange.getEnd())
+ if (!First[i]->is(Kinds[i]) ||
+ First[i]->WhitespaceRange.getBegin() !=
+ First[i]->WhitespaceRange.getEnd())
return false;
AddLength += First[i]->TokenText.size();
}
@@ -677,6 +708,7 @@ private:
First[0]->TokenText = StringRef(First[0]->TokenText.data(),
First[0]->TokenText.size() + AddLength);
First[0]->ColumnWidth += AddLength;
+ First[0]->Type = NewType;
return true;
}
@@ -720,7 +752,7 @@ private:
unsigned LastColumn = Tokens.back()->OriginalColumn;
for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
++TokenCount;
- if (I[0]->is(tok::slash) && I + 1 != E &&
+ if (I[0]->isOneOf(tok::slash, tok::slashequal) && I + 1 != E &&
(I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace,
tok::exclaim, tok::l_square, tok::colon, tok::comma,
tok::question, tok::kw_return) ||
@@ -745,6 +777,91 @@ private:
return false;
}
+ bool tryMergeTemplateString() {
+ if (Tokens.size() < 2)
+ return false;
+
+ FormatToken *EndBacktick = Tokens.back();
+ // Backticks get lexed as tok::unknown tokens. If a template string contains
+ // a comment start, it gets lexed as a tok::comment, or tok::unknown if
+ // unterminated.
+ if (!EndBacktick->isOneOf(tok::comment, tok::unknown))
+ return false;
+ size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
+ // Unknown token that's not actually a backtick, or a comment that doesn't
+ // contain a backtick.
+ if (CommentBacktickPos == StringRef::npos)
+ return false;
+
+ unsigned TokenCount = 0;
+ bool IsMultiline = false;
+ unsigned EndColumnInFirstLine =
+ EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
+ for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
+ ++TokenCount;
+ if (I[0]->NewlinesBefore > 0 || I[0]->IsMultiline)
+ IsMultiline = true;
+
+ // If there was a preceding template string, this must be the start of a
+ // template string, not the end.
+ if (I[0]->is(TT_TemplateString))
+ return false;
+
+ if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
+ // Keep track of the rhs offset of the last token to wrap across lines -
+ // its the rhs offset of the first line of the template string, used to
+ // determine its width.
+ if (I[0]->IsMultiline)
+ EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
+ // If the token has newlines, the token before it (if it exists) is the
+ // rhs end of the previous line.
+ if (I[0]->NewlinesBefore > 0 && (I + 1 != E))
+ EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
+
+ continue;
+ }
+
+ Tokens.resize(Tokens.size() - TokenCount);
+ Tokens.back()->Type = TT_TemplateString;
+ const char *EndOffset =
+ EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
+ if (CommentBacktickPos != 0) {
+ // If the backtick was not the first character (e.g. in a comment),
+ // re-lex after the backtick position.
+ SourceLocation Loc = EndBacktick->Tok.getLocation();
+ resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
+ }
+ Tokens.back()->TokenText =
+ StringRef(Tokens.back()->TokenText.data(),
+ EndOffset - Tokens.back()->TokenText.data());
+
+ unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
+ if (EndOriginalColumn == 0) {
+ SourceLocation Loc = EndBacktick->Tok.getLocation();
+ EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
+ }
+ // If the ` is further down within the token (e.g. in a comment).
+ EndOriginalColumn += CommentBacktickPos;
+
+ if (IsMultiline) {
+ // ColumnWidth is from backtick to last token in line.
+ // LastLineColumnWidth is 0 to backtick.
+ // x = `some content
+ // until here`;
+ Tokens.back()->ColumnWidth =
+ EndColumnInFirstLine - Tokens.back()->OriginalColumn;
+ Tokens.back()->LastLineColumnWidth = EndOriginalColumn;
+ Tokens.back()->IsMultiline = true;
+ } else {
+ // Token simply spans from start to end, +1 for the ` itself.
+ Tokens.back()->ColumnWidth =
+ EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
+ }
+ return true;
+ }
+ return false;
+ }
+
bool tryMerge_TMacro() {
if (Tokens.size() < 4)
return false;
@@ -772,6 +889,8 @@ private:
String->OriginalColumn = Macro->OriginalColumn;
String->ColumnWidth = encoding::columnWidthWithTabs(
String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
+ String->NewlinesBefore = Macro->NewlinesBefore;
+ String->HasUnescapedNewline = Macro->HasUnescapedNewline;
Tokens.pop_back();
Tokens.pop_back();
@@ -842,21 +961,33 @@ private:
return false;
}
+ FormatToken *getStashedToken() {
+ // Create a synthesized second '>' or '<' token.
+ Token Tok = FormatTok->Tok;
+ StringRef TokenText = FormatTok->TokenText;
+
+ unsigned OriginalColumn = FormatTok->OriginalColumn;
+ FormatTok = new (Allocator.Allocate()) FormatToken;
+ FormatTok->Tok = Tok;
+ SourceLocation TokLocation =
+ FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
+ FormatTok->Tok.setLocation(TokLocation);
+ FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
+ FormatTok->TokenText = TokenText;
+ FormatTok->ColumnWidth = 1;
+ FormatTok->OriginalColumn = OriginalColumn + 1;
+
+ return FormatTok;
+ }
+
FormatToken *getNextToken() {
if (GreaterStashed) {
- // Create a synthesized second '>' token.
- // FIXME: Increment Column and set OriginalColumn.
- Token Greater = FormatTok->Tok;
- FormatTok = new (Allocator.Allocate()) FormatToken;
- FormatTok->Tok = Greater;
- SourceLocation GreaterLocation =
- FormatTok->Tok.getLocation().getLocWithOffset(1);
- FormatTok->WhitespaceRange =
- SourceRange(GreaterLocation, GreaterLocation);
- FormatTok->TokenText = ">";
- FormatTok->ColumnWidth = 1;
GreaterStashed = false;
- return FormatTok;
+ return getStashedToken();
+ }
+ if (LessStashed) {
+ LessStashed = false;
+ return getStashedToken();
}
FormatTok = new (Allocator.Allocate()) FormatToken;
@@ -869,20 +1000,32 @@ private:
// Consume and record whitespace until we find a significant token.
unsigned WhitespaceLength = TrailingWhitespace;
while (FormatTok->Tok.is(tok::unknown)) {
- for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
- switch (FormatTok->TokenText[i]) {
+ StringRef Text = FormatTok->TokenText;
+ auto EscapesNewline = [&](int pos) {
+ // A '\r' here is just part of '\r\n'. Skip it.
+ if (pos >= 0 && Text[pos] == '\r')
+ --pos;
+ // See whether there is an odd number of '\' before this.
+ unsigned count = 0;
+ for (; pos >= 0; --pos, ++count)
+ if (Text[pos] != '\\')
+ break;
+ return count & 1;
+ };
+ // FIXME: This miscounts tok:unknown tokens that are not just
+ // whitespace, e.g. a '`' character.
+ for (int i = 0, e = Text.size(); i != e; ++i) {
+ switch (Text[i]) {
case '\n':
++FormatTok->NewlinesBefore;
- // FIXME: This is technically incorrect, as it could also
- // be a literal backslash at the end of the line.
- if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
- (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
- FormatTok->TokenText[i - 2] != '\\')))
- FormatTok->HasUnescapedNewline = true;
+ FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
Column = 0;
break;
case '\r':
+ FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+ Column = 0;
+ break;
case '\f':
case '\v':
Column = 0;
@@ -894,8 +1037,7 @@ private:
Column += Style.TabWidth - Column % Style.TabWidth;
break;
case '\\':
- if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
- FormatTok->TokenText[i + 1] != '\n'))
+ if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
FormatTok->Type = TT_ImplicitStringLiteral;
break;
default:
@@ -920,6 +1062,7 @@ private:
FormatTok->TokenText[1] == '\n') {
++FormatTok->NewlinesBefore;
WhitespaceLength += 2;
+ FormatTok->LastNewlineOffset = 2;
Column = 0;
FormatTok->TokenText = FormatTok->TokenText.substr(2);
}
@@ -948,6 +1091,10 @@ private:
FormatTok->Tok.setKind(tok::greater);
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
GreaterStashed = true;
+ } else if (FormatTok->Tok.is(tok::lessless)) {
+ FormatTok->Tok.setKind(tok::less);
+ FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
+ LessStashed = true;
}
// Now FormatTok is the next non-whitespace token.
@@ -975,16 +1122,16 @@ private:
Column = FormatTok->LastLineColumnWidth;
}
- FormatTok->IsForEachMacro =
- std::binary_search(ForEachMacros.begin(), ForEachMacros.end(),
- FormatTok->Tok.getIdentifierInfo());
+ if (std::find(ForEachMacros.begin(), ForEachMacros.end(),
+ FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end())
+ FormatTok->Type = TT_ForEachMacro;
return FormatTok;
}
FormatToken *FormatTok;
bool IsFirstToken;
- bool GreaterStashed;
+ bool GreaterStashed, LessStashed;
unsigned Column;
unsigned TrailingWhitespace;
std::unique_ptr<Lexer> Lex;
@@ -1072,13 +1219,13 @@ public:
<< "\n");
}
- tooling::Replacements format() {
+ tooling::Replacements format(bool *IncompleteFormat) {
tooling::Replacements Result;
FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
*this);
- bool StructuralError = Parser.parse();
+ Parser.parse();
assert(UnwrappedLines.rbegin()->empty());
for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
++Run) {
@@ -1088,7 +1235,7 @@ public:
AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
}
tooling::Replacements RunResult =
- format(AnnotatedLines, StructuralError, Tokens);
+ format(AnnotatedLines, Tokens, IncompleteFormat);
DEBUG({
llvm::dbgs() << "Replacements for run " << Run << ":\n";
for (tooling::Replacements::iterator I = RunResult.begin(),
@@ -1107,7 +1254,7 @@ public:
}
tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
- bool StructuralError, FormatTokenLexer &Tokens) {
+ FormatTokenLexer &Tokens, bool *IncompleteFormat) {
TokenAnnotator Annotator(Style, Tokens.getKeywords());
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
Annotator.annotate(*AnnotatedLines[i]);
@@ -1122,8 +1269,9 @@ public:
ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
Whitespaces, Encoding,
BinPackInconclusiveFunctions);
- UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
- Formatter.format(AnnotatedLines, /*DryRun=*/false);
+ UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
+ IncompleteFormat)
+ .format(AnnotatedLines);
return Whitespaces.generateReplacements();
}
@@ -1340,27 +1488,20 @@ private:
} // end anonymous namespace
-tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
- SourceManager &SourceMgr,
- ArrayRef<CharSourceRange> Ranges) {
- if (Style.DisableFormat)
- return tooling::Replacements();
- return reformat(Style, SourceMgr,
- SourceMgr.getFileID(Lex.getSourceLocation()), Ranges);
-}
-
tooling::Replacements reformat(const FormatStyle &Style,
SourceManager &SourceMgr, FileID ID,
- ArrayRef<CharSourceRange> Ranges) {
+ ArrayRef<CharSourceRange> Ranges,
+ bool *IncompleteFormat) {
if (Style.DisableFormat)
return tooling::Replacements();
Formatter formatter(Style, SourceMgr, ID, Ranges);
- return formatter.format();
+ return formatter.format(IncompleteFormat);
}
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
ArrayRef<tooling::Range> Ranges,
- StringRef FileName) {
+ StringRef FileName,
+ bool *IncompleteFormat) {
if (Style.DisableFormat)
return tooling::Replacements();
@@ -1383,7 +1524,7 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
SourceLocation End = Start.getLocWithOffset(Range.getLength());
CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
}
- return reformat(Style, SourceMgr, ID, CharRanges);
+ return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
}
LangOptions getFormattingLangOpts(const FormatStyle &Style) {
@@ -1392,12 +1533,12 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
LangOpts.LineComment = 1;
- bool AlternativeOperators = Style.Language != FormatStyle::LK_JavaScript &&
- Style.Language != FormatStyle::LK_Java;
+ bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
LangOpts.Bool = 1;
LangOpts.ObjC1 = 1;
LangOpts.ObjC2 = 1;
+ LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
return LangOpts;
}
@@ -1415,7 +1556,8 @@ const char *StyleOptionHelpDescription =
static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
if (FileName.endswith(".java")) {
return FormatStyle::LK_Java;
- } else if (FileName.endswith_lower(".js")) {
+ } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) {
+ // JavaScript or TypeScript.
return FormatStyle::LK_JavaScript;
} else if (FileName.endswith_lower(".proto") ||
FileName.endswith_lower(".protodevel")) {
diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp
index badb3a39c82c2..88678ca1abe1a 100644
--- a/lib/Format/FormatToken.cpp
+++ b/lib/Format/FormatToken.cpp
@@ -18,6 +18,7 @@
#include "clang/Format/Format.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
+#include <climits>
namespace clang {
namespace format {
@@ -59,12 +60,13 @@ void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
unsigned CommaSeparatedList::formatAfterToken(LineState &State,
ContinuationIndenter *Indenter,
bool DryRun) {
- if (!State.NextToken->Previous || !State.NextToken->Previous->Previous)
+ if (State.NextToken == nullptr || !State.NextToken->Previous)
return 0;
// Ensure that we start on the opening brace.
- const FormatToken *LBrace = State.NextToken->Previous->Previous;
- if (LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block ||
+ const FormatToken *LBrace =
+ State.NextToken->Previous->getPreviousNonComment();
+ if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block ||
LBrace->Type == TT_DictLiteral ||
LBrace->Next->Type == TT_DesignatedInitializerPeriod)
return 0;
@@ -132,9 +134,9 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
return;
// In C++11 braced list style, we should not format in columns unless they
- // have many items (20 or more) or we allow bin-packing of function
- // parameters.
- if (Style.Cpp11BracedListStyle && !Style.BinPackParameters &&
+ // have many items (20 or more) or we allow bin-packing of function call
+ // arguments.
+ if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
Commas.size() < 19)
return;
@@ -143,19 +145,21 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
return;
FormatToken *ItemBegin = Token->Next;
+ while (ItemBegin->isTrailingComment())
+ ItemBegin = ItemBegin->Next;
SmallVector<bool, 8> MustBreakBeforeItem;
// The lengths of an item if it is put at the end of the line. This includes
// trailing comments which are otherwise ignored for column alignment.
SmallVector<unsigned, 8> EndOfLineItemLength;
- unsigned MinItemLength = Style.ColumnLimit;
- unsigned MaxItemLength = 0;
-
+ bool HasSeparatingComment = false;
for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
// Skip comments on their own line.
- while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment())
+ while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
ItemBegin = ItemBegin->Next;
+ HasSeparatingComment = i > 0;
+ }
MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
if (ItemBegin->is(tok::l_brace))
@@ -178,8 +182,6 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
ItemEnd = Commas[i];
// The comma is counted as part of the item when calculating the length.
ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
- MinItemLength = std::min(MinItemLength, ItemLengths.back());
- MaxItemLength = std::max(MaxItemLength, ItemLengths.back());
// Consume trailing comments so the are included in EndOfLineItemLength.
if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
@@ -194,12 +196,9 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
ItemBegin = ItemEnd->Next;
}
- // If this doesn't have a nested list, we require at least 6 elements in order
- // create a column layout. If it has a nested list, column layout ensures one
- // list element per line. If the difference between the shortest and longest
- // element is too large, column layout would create too much whitespace.
- if (HasNestedBracedList || Commas.size() < 5 || Token->NestingLevel != 0 ||
- MaxItemLength - MinItemLength > 10)
+ // Don't use column layout for nested lists, lists with few elements and in
+ // presence of separating comments.
+ if (Token->NestingLevel != 0 || Commas.size() < 5 || HasSeparatingComment)
return;
// We can never place more than ColumnLimit / 3 items in a row (because of the
@@ -208,6 +207,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
ColumnFormat Format;
Format.Columns = Columns;
Format.ColumnSizes.resize(Columns);
+ std::vector<unsigned> MinSizeInColumn(Columns, UINT_MAX);
Format.LineCount = 1;
bool HasRowWithSufficientColumns = false;
unsigned Column = 0;
@@ -219,9 +219,10 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
}
if (Column == Columns - 1)
HasRowWithSufficientColumns = true;
- unsigned length =
+ unsigned Length =
(Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
- Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], length);
+ Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
+ MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
++Column;
}
// If all rows are terminated early (e.g. by trailing comments), we don't
@@ -229,9 +230,19 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
if (!HasRowWithSufficientColumns)
break;
Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
- for (unsigned i = 0; i < Columns; ++i) {
+
+ for (unsigned i = 0; i < Columns; ++i)
Format.TotalWidth += Format.ColumnSizes[i];
- }
+
+ // Don't use this Format, if the difference between the longest and shortest
+ // element in a column exceeds a threshold to avoid excessive spaces.
+ if ([&] {
+ for (unsigned i = 0; i < Columns - 1; ++i)
+ if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
+ return true;
+ return false;
+ }())
+ continue;
// Ignore layouts that are bound to violate the column limit.
if (Format.TotalWidth > Style.ColumnLimit)
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index 4811e02dd2282..ec0fdf4aa813b 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -41,13 +41,19 @@ enum TokenType {
TT_CtorInitializerComma,
TT_DesignatedInitializerPeriod,
TT_DictLiteral,
+ TT_ForEachMacro,
+ TT_FunctionAnnotationRParen,
TT_FunctionDeclarationName,
TT_FunctionLBrace,
TT_FunctionTypeLParen,
TT_ImplicitStringLiteral,
TT_InheritanceColon,
+ TT_InlineASMBrace,
TT_InlineASMColon,
TT_JavaAnnotation,
+ TT_JsFatArrow,
+ TT_JsTypeColon,
+ TT_JsTypeOptionalQuestion,
TT_LambdaArrow,
TT_LambdaLSquare,
TT_LeadingJavaAnnotation,
@@ -59,6 +65,7 @@ enum TokenType {
TT_ObjCMethodExpr,
TT_ObjCMethodSpecifier,
TT_ObjCProperty,
+ TT_ObjCStringLiteral,
TT_OverloadedOperator,
TT_OverloadedOperatorLParen,
TT_PointerOrReference,
@@ -69,6 +76,7 @@ enum TokenType {
TT_StartOfName,
TT_TemplateCloser,
TT_TemplateOpener,
+ TT_TemplateString,
TT_TrailingAnnotation,
TT_TrailingReturnArrow,
TT_TrailingUnaryOperator,
@@ -102,21 +110,7 @@ class AnnotatedLine;
/// \brief A wrapper around a \c Token storing information about the
/// whitespace characters preceding it.
struct FormatToken {
- FormatToken()
- : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
- ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false),
- IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
- BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
- CanBreakBefore(false), ClosesTemplateDeclaration(false),
- ParameterCount(0), BlockParameterCount(0),
- PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
- BindingStrength(0), NestingLevel(0), SplitPenalty(0),
- LongestObjCSelectorName(0), FakeRParens(0),
- StartsBinaryExpression(false), EndsBinaryExpression(false),
- OperatorIndex(0), LastOperator(false),
- PartOfMultiVariableDeclStmt(false), IsForEachMacro(false),
- MatchingParen(nullptr), Previous(nullptr), Next(nullptr),
- Decision(FD_Unformatted), Finalized(false) {}
+ FormatToken() {}
/// \brief The \c Token.
Token Tok;
@@ -125,48 +119,39 @@ struct FormatToken {
///
/// This can be used to determine what the user wrote in the original code
/// and thereby e.g. leave an empty line between two function definitions.
- unsigned NewlinesBefore;
+ unsigned NewlinesBefore = 0;
/// \brief Whether there is at least one unescaped newline before the \c
/// Token.
- bool HasUnescapedNewline;
+ bool HasUnescapedNewline = false;
/// \brief The range of the whitespace immediately preceding the \c Token.
SourceRange WhitespaceRange;
/// \brief The offset just past the last '\n' in this token's leading
/// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
- unsigned LastNewlineOffset;
+ unsigned LastNewlineOffset = 0;
/// \brief The width of the non-whitespace parts of the token (or its first
/// line for multi-line tokens) in columns.
/// We need this to correctly measure number of columns a token spans.
- unsigned ColumnWidth;
+ unsigned ColumnWidth = 0;
/// \brief Contains the width in columns of the last line of a multi-line
/// token.
- unsigned LastLineColumnWidth;
+ unsigned LastLineColumnWidth = 0;
/// \brief Whether the token text contains newlines (escaped or not).
- bool IsMultiline;
+ bool IsMultiline = false;
/// \brief Indicates that this is the first token.
- bool IsFirst;
+ bool IsFirst = false;
/// \brief Whether there must be a line break before this token.
///
/// This happens for example when a preprocessor directive ended directly
/// before the token.
- bool MustBreakBefore;
-
- /// \brief Returns actual token start location without leading escaped
- /// newlines and whitespace.
- ///
- /// This can be different to Tok.getLocation(), which includes leading escaped
- /// newlines.
- SourceLocation getStartOfNonWhitespace() const {
- return WhitespaceRange.getEnd();
- }
+ bool MustBreakBefore = false;
/// \brief The raw text of the token.
///
@@ -175,69 +160,74 @@ struct FormatToken {
StringRef TokenText;
/// \brief Set to \c true if this token is an unterminated literal.
- bool IsUnterminatedLiteral;
+ bool IsUnterminatedLiteral = 0;
/// \brief Contains the kind of block if this token is a brace.
- BraceBlockKind BlockKind;
+ BraceBlockKind BlockKind = BK_Unknown;
- TokenType Type;
+ TokenType Type = TT_Unknown;
/// \brief The number of spaces that should be inserted before this token.
- unsigned SpacesRequiredBefore;
+ unsigned SpacesRequiredBefore = 0;
/// \brief \c true if it is allowed to break before this token.
- bool CanBreakBefore;
+ bool CanBreakBefore = false;
- bool ClosesTemplateDeclaration;
+ /// \brief \c true if this is the ">" of "template<..>".
+ bool ClosesTemplateDeclaration = false;
/// \brief Number of parameters, if this is "(", "[" or "<".
///
/// This is initialized to 1 as we don't need to distinguish functions with
/// 0 parameters from functions with 1 parameter. Thus, we can simply count
/// the number of commas.
- unsigned ParameterCount;
+ unsigned ParameterCount = 0;
/// \brief Number of parameters that are nested blocks,
/// if this is "(", "[" or "<".
- unsigned BlockParameterCount;
+ unsigned BlockParameterCount = 0;
+
+ /// \brief If this is a bracket ("<", "(", "[" or "{"), contains the kind of
+ /// the surrounding bracket.
+ tok::TokenKind ParentBracket = tok::unknown;
/// \brief A token can have a special role that can carry extra information
/// about the token's formatting.
std::unique_ptr<TokenRole> Role;
/// \brief If this is an opening parenthesis, how are the parameters packed?
- ParameterPackingKind PackingKind;
+ ParameterPackingKind PackingKind = PPK_Inconclusive;
/// \brief The total length of the unwrapped line up to and including this
/// token.
- unsigned TotalLength;
+ unsigned TotalLength = 0;
/// \brief The original 0-based column of this token, including expanded tabs.
/// The configured TabWidth is used as tab width.
- unsigned OriginalColumn;
+ unsigned OriginalColumn = 0;
/// \brief The length of following tokens until the next natural split point,
/// or the next token that can be broken.
- unsigned UnbreakableTailLength;
+ unsigned UnbreakableTailLength = 0;
// FIXME: Come up with a 'cleaner' concept.
/// \brief The binding strength of a token. This is a combined value of
/// operator precedence, parenthesis nesting, etc.
- unsigned BindingStrength;
+ unsigned BindingStrength = 0;
/// \brief The nesting level of this token, i.e. the number of surrounding (),
/// [], {} or <>.
- unsigned NestingLevel;
+ unsigned NestingLevel = 0;
/// \brief Penalty for inserting a line break before this token.
- unsigned SplitPenalty;
+ unsigned SplitPenalty = 0;
/// \brief If this is the first ObjC selector name in an ObjC method
/// definition or call, this contains the length of the longest name.
///
/// This being set to 0 means that the selectors should not be colon-aligned,
/// e.g. because several of them are block-type.
- unsigned LongestObjCSelectorName;
+ unsigned LongestObjCSelectorName = 0;
/// \brief Stores the number of required fake parentheses and the
/// corresponding operator precedence.
@@ -246,29 +236,47 @@ struct FormatToken {
/// reverse order, i.e. inner fake parenthesis first.
SmallVector<prec::Level, 4> FakeLParens;
/// \brief Insert this many fake ) after this token for correct indentation.
- unsigned FakeRParens;
+ unsigned FakeRParens = 0;
/// \brief \c true if this token starts a binary expression, i.e. has at least
/// one fake l_paren with a precedence greater than prec::Unknown.
- bool StartsBinaryExpression;
+ bool StartsBinaryExpression = false;
/// \brief \c true if this token ends a binary expression.
- bool EndsBinaryExpression;
+ bool EndsBinaryExpression = false;
/// \brief Is this is an operator (or "."/"->") in a sequence of operators
/// with the same precedence, contains the 0-based operator index.
- unsigned OperatorIndex;
+ unsigned OperatorIndex = 0;
/// \brief Is this the last operator (or "."/"->") in a sequence of operators
/// with the same precedence?
- bool LastOperator;
+ bool LastOperator = false;
/// \brief Is this token part of a \c DeclStmt defining multiple variables?
///
/// Only set if \c Type == \c TT_StartOfName.
- bool PartOfMultiVariableDeclStmt;
+ bool PartOfMultiVariableDeclStmt = false;
+
+ /// \brief If this is a bracket, this points to the matching one.
+ FormatToken *MatchingParen = nullptr;
+
+ /// \brief The previous token in the unwrapped line.
+ FormatToken *Previous = nullptr;
- /// \brief Is this a foreach macro?
- bool IsForEachMacro;
+ /// \brief The next token in the unwrapped line.
+ FormatToken *Next = nullptr;
+
+ /// \brief If this token starts a block, this contains all the unwrapped lines
+ /// in it.
+ SmallVector<AnnotatedLine *, 1> Children;
+
+ /// \brief Stores the formatting decision for the token once it was made.
+ FormatDecision Decision = FD_Unformatted;
+
+ /// \brief If \c true, this token has been fully formatted (indented and
+ /// potentially re-formatted inside), and we do not allow further formatting
+ /// changes.
+ bool Finalized = false;
bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
bool is(TokenType TT) const { return Type == TT; }
@@ -278,27 +286,10 @@ struct FormatToken {
template <typename A, typename B> bool isOneOf(A K1, B K2) const {
return is(K1) || is(K2);
}
- template <typename A, typename B, typename C>
- bool isOneOf(A K1, B K2, C K3) const {
- return is(K1) || is(K2) || is(K3);
- }
- template <typename A, typename B, typename C, typename D>
- bool isOneOf(A K1, B K2, C K3, D K4) const {
- return is(K1) || is(K2) || is(K3) || is(K4);
+ template <typename A, typename B, typename... Ts>
+ bool isOneOf(A K1, B K2, Ts... Ks) const {
+ return is(K1) || isOneOf(K2, Ks...);
}
- template <typename A, typename B, typename C, typename D, typename E>
- bool isOneOf(A K1, B K2, C K3, D K4, E K5) const {
- return is(K1) || is(K2) || is(K3) || is(K4) || is(K5);
- }
- template <typename T>
- bool isOneOf(T K1, T K2, T K3, T K4, T K5, T K6, T K7 = tok::NUM_TOKENS,
- T K8 = tok::NUM_TOKENS, T K9 = tok::NUM_TOKENS,
- T K10 = tok::NUM_TOKENS, T K11 = tok::NUM_TOKENS,
- T K12 = tok::NUM_TOKENS) const {
- return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
- is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
- }
-
template <typename T> bool isNot(T Kind) const { return !is(Kind); }
bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
@@ -336,7 +327,8 @@ struct FormatToken {
/// \brief Returns \c true if this is a "." or "->" accessing a member.
bool isMemberAccess() const {
return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
- !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow);
+ !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
+ TT_LambdaArrow);
}
bool isUnaryOperator() const {
@@ -385,6 +377,15 @@ struct FormatToken {
}
}
+ /// \brief Returns actual token start location without leading escaped
+ /// newlines and whitespace.
+ ///
+ /// This can be different to Tok.getLocation(), which includes leading escaped
+ /// newlines.
+ SourceLocation getStartOfNonWhitespace() const {
+ return WhitespaceRange.getEnd();
+ }
+
prec::Level getPrecedence() const {
return getBinOpPrecedence(Tok.getKind(), true, true);
}
@@ -419,25 +420,10 @@ struct FormatToken {
return MatchingParen && MatchingParen->opensBlockTypeList(Style);
}
- FormatToken *MatchingParen;
-
- FormatToken *Previous;
- FormatToken *Next;
-
- SmallVector<AnnotatedLine *, 1> Children;
-
- /// \brief Stores the formatting decision for the token once it was made.
- FormatDecision Decision;
-
- /// \brief If \c true, this token has been fully formatted (indented and
- /// potentially re-formatted inside), and we do not allow further formatting
- /// changes.
- bool Finalized;
-
private:
// Disallow copying.
- FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION;
- void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
+ FormatToken(const FormatToken &) = delete;
+ void operator=(const FormatToken &) = delete;
};
class ContinuationIndenter;
@@ -543,6 +529,7 @@ struct AdditionalKeywords {
kw_finally = &IdentTable.get("finally");
kw_function = &IdentTable.get("function");
+ kw_import = &IdentTable.get("import");
kw_var = &IdentTable.get("var");
kw_abstract = &IdentTable.get("abstract");
@@ -555,24 +542,33 @@ struct AdditionalKeywords {
kw_package = &IdentTable.get("package");
kw_synchronized = &IdentTable.get("synchronized");
kw_throws = &IdentTable.get("throws");
+ kw___except = &IdentTable.get("__except");
+
+ kw_mark = &IdentTable.get("mark");
kw_option = &IdentTable.get("option");
kw_optional = &IdentTable.get("optional");
kw_repeated = &IdentTable.get("repeated");
kw_required = &IdentTable.get("required");
kw_returns = &IdentTable.get("returns");
+
+ kw_signals = &IdentTable.get("signals");
+ kw_slots = &IdentTable.get("slots");
+ kw_qslots = &IdentTable.get("Q_SLOTS");
}
- // ObjC context sensitive keywords.
+ // Context sensitive keywords.
IdentifierInfo *kw_in;
IdentifierInfo *kw_CF_ENUM;
IdentifierInfo *kw_CF_OPTIONS;
IdentifierInfo *kw_NS_ENUM;
IdentifierInfo *kw_NS_OPTIONS;
+ IdentifierInfo *kw___except;
// JavaScript keywords.
IdentifierInfo *kw_finally;
IdentifierInfo *kw_function;
+ IdentifierInfo *kw_import;
IdentifierInfo *kw_var;
// Java keywords.
@@ -587,12 +583,20 @@ struct AdditionalKeywords {
IdentifierInfo *kw_synchronized;
IdentifierInfo *kw_throws;
+ // Pragma keywords.
+ IdentifierInfo *kw_mark;
+
// Proto keywords.
IdentifierInfo *kw_option;
IdentifierInfo *kw_optional;
IdentifierInfo *kw_repeated;
IdentifierInfo *kw_required;
IdentifierInfo *kw_returns;
+
+ // QT keywords.
+ IdentifierInfo *kw_signals;
+ IdentifierInfo *kw_slots;
+ IdentifierInfo *kw_qslots;
};
} // namespace format
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 4ba3f91969776..98f5709b90621 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -15,6 +15,7 @@
#include "TokenAnnotator.h"
#include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "format-token-annotator"
@@ -43,8 +44,14 @@ private:
bool parseAngle() {
if (!CurrentToken)
return false;
- ScopedContextCreator ContextCreator(*this, tok::less, 10);
FormatToken *Left = CurrentToken->Previous;
+ Left->ParentBracket = Contexts.back().ContextKind;
+ ScopedContextCreator ContextCreator(*this, tok::less, 10);
+
+ // If this angle is in the context of an expression, we need to be more
+ // hesitant to detect it as opening template parameters.
+ bool InExprContext = Contexts.back().IsExpression;
+
Contexts.back().IsExpression = false;
// If there's a template keyword before the opening angle bracket, this is a
// template parameter, not an argument.
@@ -68,8 +75,8 @@ private:
next();
continue;
}
- if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
- tok::colon, tok::question))
+ if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
+ (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext))
return false;
// If a && or || is found and interpreted as a binary operator, this set
// of angles is likely part of something like "a < b && c > d". If the
@@ -92,6 +99,8 @@ private:
bool parseParens(bool LookForDecls = false) {
if (!CurrentToken)
return false;
+ FormatToken *Left = CurrentToken->Previous;
+ Left->ParentBracket = Contexts.back().ContextKind;
ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
// FIXME: This is a bit of a hack. Do better.
@@ -99,7 +108,6 @@ private:
Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
bool StartsObjCMethodExpr = false;
- FormatToken *Left = CurrentToken->Previous;
if (CurrentToken->is(tok::caret)) {
// (^ can start a block type.
Left->Type = TT_ObjCBlockLParen;
@@ -117,22 +125,22 @@ private:
Left->Previous->is(TT_BinaryOperator))) {
// static_assert, if and while usually contain expressions.
Contexts.back().IsExpression = true;
- } else if (Line.InPPDirective &&
- (!Left->Previous ||
- !Left->Previous->isOneOf(tok::identifier,
- TT_OverloadedOperator))) {
- Contexts.back().IsExpression = true;
} else if (Left->Previous && Left->Previous->is(tok::r_square) &&
Left->Previous->MatchingParen &&
Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
// This is a parameter list of a lambda expression.
Contexts.back().IsExpression = false;
+ } else if (Line.InPPDirective &&
+ (!Left->Previous ||
+ !Left->Previous->isOneOf(tok::identifier,
+ TT_OverloadedOperator))) {
+ Contexts.back().IsExpression = true;
} else if (Contexts[Contexts.size() - 2].CaretFound) {
// This is the parameter list of an ObjC block.
Contexts.back().IsExpression = false;
} else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
Left->Type = TT_AttributeParen;
- } else if (Left->Previous && Left->Previous->IsForEachMacro) {
+ } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
// The first argument to a foreach macro is a declaration.
Contexts.back().IsForEachMacro = true;
Contexts.back().IsExpression = false;
@@ -149,6 +157,8 @@ private:
bool MightBeFunctionType = CurrentToken->is(tok::star);
bool HasMultipleLines = false;
bool HasMultipleParametersOnALine = false;
+ bool MightBeObjCForRangeLoop =
+ Left->Previous && Left->Previous->is(tok::kw_for);
while (CurrentToken) {
// LookForDecls is set when "if (" has been seen. Check for
// 'identifier' '*' 'identifier' followed by not '=' -- this
@@ -210,7 +220,8 @@ private:
}
if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
return false;
- else if (CurrentToken->is(tok::l_brace))
+
+ if (CurrentToken->is(tok::l_brace))
Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
!CurrentToken->Next->HasUnescapedNewline &&
@@ -219,6 +230,15 @@ private:
if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
CurrentToken->isSimpleTypeSpecifier())
Contexts.back().IsExpression = false;
+ if (CurrentToken->isOneOf(tok::semi, tok::colon))
+ MightBeObjCForRangeLoop = false;
+ if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
+ CurrentToken->Type = TT_ObjCForIn;
+ // When we discover a 'new', we set CanBeExpression to 'false' in order to
+ // parse the type correctly. Reset that after a comma.
+ if (CurrentToken->is(tok::comma))
+ Contexts.back().CanBeExpression = true;
+
FormatToken *Tok = CurrentToken;
if (!consumeToken())
return false;
@@ -237,6 +257,7 @@ private:
// ')' or ']'), it could be the start of an Objective-C method
// expression, or it could the the start of an Objective-C array literal.
FormatToken *Left = CurrentToken->Previous;
+ Left->ParentBracket = Contexts.back().ContextKind;
FormatToken *Parent = Left->getPreviousNonComment();
bool StartsObjCMethodExpr =
Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
@@ -316,6 +337,7 @@ private:
bool parseBrace() {
if (CurrentToken) {
FormatToken *Left = CurrentToken->Previous;
+ Left->ParentBracket = Contexts.back().ContextKind;
if (Contexts.back().CaretFound)
Left->Type = TT_ObjCBlockLBrace;
@@ -342,7 +364,8 @@ private:
Style.Language == FormatStyle::LK_Proto) &&
Previous->is(tok::identifier))
Previous->Type = TT_SelectorName;
- if (CurrentToken->is(tok::colon))
+ if (CurrentToken->is(tok::colon) ||
+ Style.Language == FormatStyle::LK_JavaScript)
Left->Type = TT_DictLiteral;
}
if (!consumeToken())
@@ -408,10 +431,18 @@ private:
if (!Tok->Previous)
return false;
// Colons from ?: are handled in parseConditional().
- if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1 &&
- Line.First->isNot(tok::kw_case)) {
- Tok->Type = TT_CtorInitializerColon;
- } else if (Contexts.back().ColonIsDictLiteral) {
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
+ (Contexts.size() == 1 && // switch/case labels
+ !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
+ Contexts.back().ContextKind == tok::l_paren || // function params
+ Contexts.back().ContextKind == tok::l_square || // array type
+ Line.MustBeDeclaration) { // method/property declaration
+ Tok->Type = TT_JsTypeColon;
+ break;
+ }
+ }
+ if (Contexts.back().ColonIsDictLiteral) {
Tok->Type = TT_DictLiteral;
} else if (Contexts.back().ColonIsObjCMethodExpr ||
Line.First->is(TT_ObjCMethodSpecifier)) {
@@ -429,7 +460,10 @@ private:
Tok->Type = TT_BitFieldColon;
} else if (Contexts.size() == 1 &&
!Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
- Tok->Type = TT_InheritanceColon;
+ if (Tok->Previous->is(tok::r_paren))
+ Tok->Type = TT_CtorInitializerColon;
+ else
+ Tok->Type = TT_InheritanceColon;
} else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
// This handles a special macro in ObjC code where selectors including
@@ -471,13 +505,15 @@ private:
return false;
break;
case tok::less:
- if ((!Tok->Previous ||
+ if (!NonTemplateLess.count(Tok) &&
+ (!Tok->Previous ||
(!Tok->Previous->Tok.isLiteral() &&
!(Tok->Previous->is(tok::r_paren) && Contexts.size() > 1))) &&
parseAngle()) {
Tok->Type = TT_TemplateOpener;
} else {
Tok->Type = TT_BinaryOperator;
+ NonTemplateLess.insert(Tok);
CurrentToken = Tok;
next();
}
@@ -509,21 +545,34 @@ private:
}
break;
case tok::question:
+ if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
+ Tok->Next->isOneOf(tok::semi, tok::colon, tok::r_paren,
+ tok::r_brace)) {
+ // Question marks before semicolons, colons, etc. indicate optional
+ // types (fields, parameters), e.g.
+ // function(x?: string, y?) {...}
+ // class X { y?; }
+ Tok->Type = TT_JsTypeOptionalQuestion;
+ break;
+ }
+ // Declarations cannot be conditional expressions, this can only be part
+ // of a type declaration.
+ if (Line.MustBeDeclaration &&
+ Style.Language == FormatStyle::LK_JavaScript)
+ break;
parseConditional();
break;
case tok::kw_template:
parseTemplateDeclaration();
break;
- case tok::identifier:
- if (Line.First->is(tok::kw_for) && Tok->is(Keywords.kw_in) &&
- Tok->Previous->isNot(tok::colon))
- Tok->Type = TT_ObjCForIn;
- break;
case tok::comma:
- if (Contexts.back().FirstStartOfName)
- Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
if (Contexts.back().InCtorInitializer)
Tok->Type = TT_CtorInitializerComma;
+ else if (Contexts.back().FirstStartOfName &&
+ (Contexts.size() == 1 || Line.First->is(tok::kw_for))) {
+ Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
+ Line.IsMultiVariableDeclStmt = true;
+ }
if (Contexts.back().IsForEachMacro)
Contexts.back().IsExpression = true;
break;
@@ -557,11 +606,14 @@ private:
void parsePragma() {
next(); // Consume "pragma".
- if (CurrentToken && CurrentToken->TokenText == "mark") {
+ if (CurrentToken &&
+ CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
+ bool IsMark = CurrentToken->is(Keywords.kw_mark);
next(); // Consume "mark".
next(); // Consume first token (so we fix leading whitespace).
while (CurrentToken) {
- CurrentToken->Type = TT_ImplicitStringLiteral;
+ if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
+ CurrentToken->Type = TT_ImplicitStringLiteral;
next();
}
}
@@ -582,6 +634,7 @@ private:
return Type;
switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
case tok::pp_include:
+ case tok::pp_include_next:
case tok::pp_import:
next();
parseIncludeDirective();
@@ -609,9 +662,9 @@ private:
public:
LineType parseLine() {
- if (CurrentToken->is(tok::hash)) {
+ NonTemplateLess.clear();
+ if (CurrentToken->is(tok::hash))
return parsePreprocessorDirective();
- }
// Directly allow to 'import <string-literal>' to support protocol buffer
// definitions (code.google.com/p/protobuf) or missing "#" (either way we
@@ -635,6 +688,15 @@ public:
return LT_ImportStatement;
}
+ // In .proto files, top-level options are very similar to import statements
+ // and should not be line-wrapped.
+ if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
+ CurrentToken->is(Keywords.kw_option)) {
+ next();
+ if (CurrentToken && CurrentToken->is(tok::identifier))
+ return LT_ImportStatement;
+ }
+
bool KeywordVirtualFound = false;
bool ImportStatement = false;
while (CurrentToken) {
@@ -678,11 +740,13 @@ private:
// Reset token type in case we have already looked at it and then
// recovered from an error (e.g. failure to find the matching >).
- if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_FunctionLBrace,
- TT_ImplicitStringLiteral, TT_RegexLiteral,
+ if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
+ TT_FunctionLBrace, TT_ImplicitStringLiteral,
+ TT_InlineASMBrace, TT_RegexLiteral,
TT_TrailingReturnArrow))
CurrentToken->Type = TT_Unknown;
CurrentToken->Role.reset();
+ CurrentToken->MatchingParen = nullptr;
CurrentToken->FakeLParens.clear();
CurrentToken->FakeRParens = 0;
}
@@ -705,27 +769,22 @@ private:
Context(tok::TokenKind ContextKind, unsigned BindingStrength,
bool IsExpression)
: ContextKind(ContextKind), BindingStrength(BindingStrength),
- LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
- ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false),
- FirstObjCSelectorName(nullptr), FirstStartOfName(nullptr),
- IsExpression(IsExpression), CanBeExpression(true),
- InTemplateArgument(false), InCtorInitializer(false),
- CaretFound(false), IsForEachMacro(false) {}
+ IsExpression(IsExpression) {}
tok::TokenKind ContextKind;
unsigned BindingStrength;
- unsigned LongestObjCSelectorName;
- bool ColonIsForRangeExpr;
- bool ColonIsDictLiteral;
- bool ColonIsObjCMethodExpr;
- FormatToken *FirstObjCSelectorName;
- FormatToken *FirstStartOfName;
bool IsExpression;
- bool CanBeExpression;
- bool InTemplateArgument;
- bool InCtorInitializer;
- bool CaretFound;
- bool IsForEachMacro;
+ unsigned LongestObjCSelectorName = 0;
+ bool ColonIsForRangeExpr = false;
+ bool ColonIsDictLiteral = false;
+ bool ColonIsObjCMethodExpr = false;
+ FormatToken *FirstObjCSelectorName = nullptr;
+ FormatToken *FirstStartOfName = nullptr;
+ bool CanBeExpression = true;
+ bool InTemplateArgument = false;
+ bool InCtorInitializer = false;
+ bool CaretFound = false;
+ bool IsForEachMacro = false;
};
/// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
@@ -746,23 +805,29 @@ private:
void modifyContext(const FormatToken &Current) {
if (Current.getPrecedence() == prec::Assignment &&
- !Line.First->isOneOf(tok::kw_template, tok::kw_using,
- TT_UnaryOperator) &&
+ !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
(!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
Contexts.back().IsExpression = true;
- for (FormatToken *Previous = Current.Previous;
- Previous && !Previous->isOneOf(tok::comma, tok::semi);
- Previous = Previous->Previous) {
- if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
- Previous = Previous->MatchingParen;
- if (!Previous)
+ if (!Line.First->is(TT_UnaryOperator)) {
+ for (FormatToken *Previous = Current.Previous;
+ Previous && !Previous->isOneOf(tok::comma, tok::semi);
+ Previous = Previous->Previous) {
+ if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
+ Previous = Previous->MatchingParen;
+ if (!Previous)
+ break;
+ }
+ if (Previous->opensScope())
break;
+ if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
+ Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
+ Previous->Previous && Previous->Previous->isNot(tok::equal))
+ Previous->Type = TT_PointerOrReference;
}
- if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
- Previous->isOneOf(tok::star, tok::amp) && Previous->Previous &&
- Previous->Previous->isNot(tok::equal))
- Previous->Type = TT_PointerOrReference;
}
+ } else if (Current.is(tok::lessless) &&
+ (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
+ Contexts.back().IsExpression = true;
} else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
Contexts.back().IsExpression = true;
} else if (Current.is(TT_TrailingReturnArrow)) {
@@ -833,30 +898,56 @@ private:
} else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
Current.Type = TT_UnaryOperator;
} else if (Current.is(tok::question)) {
- Current.Type = TT_ConditionalExpr;
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Line.MustBeDeclaration) {
+ // In JavaScript, `interface X { foo?(): bar; }` is an optional method
+ // on the interface, not a ternary expression.
+ Current.Type = TT_JsTypeOptionalQuestion;
+ } else {
+ Current.Type = TT_ConditionalExpr;
+ }
} else if (Current.isBinaryOperator() &&
(!Current.Previous || Current.Previous->isNot(tok::l_square))) {
Current.Type = TT_BinaryOperator;
} else if (Current.is(tok::comment)) {
- if (Current.TokenText.startswith("//"))
+ if (Current.TokenText.startswith("/*")) {
+ if (Current.TokenText.endswith("*/"))
+ Current.Type = TT_BlockComment;
+ else
+ // The lexer has for some reason determined a comment here. But we
+ // cannot really handle it, if it isn't properly terminated.
+ Current.Tok.setKind(tok::unknown);
+ } else {
Current.Type = TT_LineComment;
- else
- Current.Type = TT_BlockComment;
+ }
} else if (Current.is(tok::r_paren)) {
if (rParenEndsCast(Current))
Current.Type = TT_CastRParen;
+ if (Current.MatchingParen && Current.Next &&
+ !Current.Next->isBinaryOperator() &&
+ !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace))
+ if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
+ if (BeforeParen->is(tok::identifier) &&
+ BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
+ (!BeforeParen->Previous ||
+ BeforeParen->Previous->ClosesTemplateDeclaration))
+ Current.Type = TT_FunctionAnnotationRParen;
} else if (Current.is(tok::at) && Current.Next) {
- switch (Current.Next->Tok.getObjCKeywordID()) {
- case tok::objc_interface:
- case tok::objc_implementation:
- case tok::objc_protocol:
- Current.Type = TT_ObjCDecl;
- break;
- case tok::objc_property:
- Current.Type = TT_ObjCProperty;
- break;
- default:
- break;
+ if (Current.Next->isStringLiteral()) {
+ Current.Type = TT_ObjCStringLiteral;
+ } else {
+ switch (Current.Next->Tok.getObjCKeywordID()) {
+ case tok::objc_interface:
+ case tok::objc_implementation:
+ case tok::objc_protocol:
+ Current.Type = TT_ObjCDecl;
+ break;
+ case tok::objc_property:
+ Current.Type = TT_ObjCProperty;
+ break;
+ default:
+ break;
+ }
}
} else if (Current.is(tok::period)) {
FormatToken *PreviousNoComment = Current.getPreviousNonComment();
@@ -875,7 +966,9 @@ private:
// Line.MightBeFunctionDecl can only be true after the parentheses of a
// function declaration have been found.
Current.Type = TT_TrailingAnnotation;
- } else if (Style.Language == FormatStyle::LK_Java && Current.Previous) {
+ } else if ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ Current.Previous) {
if (Current.Previous->is(tok::at) &&
Current.isNot(Keywords.kw_interface)) {
const FormatToken &AtToken = *Current.Previous;
@@ -902,7 +995,7 @@ private:
return false;
if (Tok.Previous->is(TT_LeadingJavaAnnotation))
- return false;
+ return false;
// Skip "const" as it does not have an influence on whether this is a name.
FormatToken *PreviousNotConst = Tok.Previous;
@@ -964,8 +1057,7 @@ private:
bool IsSizeOfOrAlignOf =
LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
- ((Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression) ||
- (Tok.Next && Tok.Next->isBinaryOperator())))
+ (Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression))
IsCast = true;
else if (Tok.Next && Tok.Next->isNot(tok::string_literal) &&
(Tok.Next->Tok.isLiteral() ||
@@ -995,7 +1087,8 @@ private:
}
for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) {
- if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) {
+ if (!Prev ||
+ !Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) {
IsCast = false;
break;
}
@@ -1032,7 +1125,7 @@ private:
if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
return TT_PointerOrReference;
- if (NextToken->isOneOf(tok::kw_operator, tok::comma))
+ if (NextToken->isOneOf(tok::kw_operator, tok::comma, tok::semi))
return TT_PointerOrReference;
if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
@@ -1108,10 +1201,16 @@ private:
FormatToken *CurrentToken;
bool AutoFound;
const AdditionalKeywords &Keywords;
+
+ // Set of "<" tokens that do not open a template parameter list. If parseAngle
+ // determines that a specific token can't be a template opener, it will make
+ // same decision irrespective of the decisions for tokens leading up to it.
+ // Store this information to prevent this from causing exponential runtime.
+ llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
};
-static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
-static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
+static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
+static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
/// \brief Parses binary expressions by inserting fake parenthesis based on
/// operator precedence.
@@ -1361,12 +1460,13 @@ static bool isFunctionDeclarationName(const FormatToken &Current) {
assert(Next->is(tok::l_paren));
if (Next->Next == Next->MatchingParen)
return true;
- for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen;
+ for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
Tok = Tok->Next) {
if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
Tok->isOneOf(TT_PointerOrReference, TT_StartOfName))
return true;
- if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral())
+ if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
+ Tok->Tok.isLiteral())
return false;
}
return false;
@@ -1502,7 +1602,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.is(tok::comma) && Left.NestingLevel == 0)
return 3;
} else if (Style.Language == FormatStyle::LK_JavaScript) {
- if (Right.is(Keywords.kw_function))
+ if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
return 100;
}
@@ -1512,6 +1612,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Right.is(tok::l_square)) {
if (Style.Language == FormatStyle::LK_Proto)
return 1;
+ // Slightly prefer formatting local lambda definitions like functions.
+ if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
+ return 50;
if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare))
return 500;
}
@@ -1521,11 +1624,15 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
return 3;
if (Left.is(TT_StartOfName))
- return 20;
+ return 110;
if (InFunctionDecl && Right.NestingLevel == 0)
return Style.PenaltyReturnTypeOnItsOwnLine;
return 200;
}
+ if (Right.is(TT_PointerOrReference))
+ return 190;
+ if (Right.is(TT_TrailingReturnArrow))
+ return 110;
if (Left.is(tok::equal) && Right.is(tok::l_brace))
return 150;
if (Left.is(TT_CastRParen))
@@ -1575,6 +1682,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket)
return 100;
+ if (Left.is(tok::l_paren) && Left.Previous && Left.Previous->is(tok::kw_if))
+ return 1000;
if (Left.is(tok::equal) && InFunctionDecl)
return 110;
if (Right.is(tok::r_brace))
@@ -1591,7 +1700,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
return 50;
if (Right.is(tok::lessless)) {
- if (Left.is(tok::string_literal)) {
+ if (Left.is(tok::string_literal) &&
+ (!Right.LastOperator || Right.OperatorIndex != 1)) {
StringRef Content = Left.TokenText;
if (Content.startswith("\""))
Content = Content.drop_front(1);
@@ -1607,7 +1717,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Left.is(TT_ConditionalExpr))
return prec::Conditional;
prec::Level Level = Left.getPrecedence();
-
+ if (Level != prec::Unknown)
+ return Level;
+ Level = Right.getPrecedence();
if (Level != prec::Unknown)
return Level;
@@ -1636,7 +1748,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Right.isOneOf(tok::semi, tok::comma))
return false;
if (Right.is(tok::less) &&
- (Left.isOneOf(tok::kw_template, tok::r_paren) ||
+ (Left.is(tok::kw_template) ||
(Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
return true;
if (Left.isOneOf(tok::exclaim, tok::tilde))
@@ -1655,17 +1767,27 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Left.is(tok::l_square) && Right.is(tok::amp))
return false;
if (Right.is(TT_PointerOrReference))
- return Left.Tok.isLiteral() ||
- (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
- Style.PointerAlignment != FormatStyle::PAS_Left);
+ return !(Left.is(tok::r_paren) && Left.MatchingParen &&
+ (Left.MatchingParen->is(TT_OverloadedOperatorLParen) ||
+ (Left.MatchingParen->Previous &&
+ Left.MatchingParen->Previous->is(
+ TT_FunctionDeclarationName)))) &&
+ (Left.Tok.isLiteral() ||
+ (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
+ (Style.PointerAlignment != FormatStyle::PAS_Left ||
+ Line.IsMultiVariableDeclStmt)));
if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
(!Left.is(TT_PointerOrReference) ||
- Style.PointerAlignment != FormatStyle::PAS_Right))
+ (Style.PointerAlignment != FormatStyle::PAS_Right &&
+ !Line.IsMultiVariableDeclStmt)))
return true;
if (Left.is(TT_PointerOrReference))
return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
- (!Right.isOneOf(TT_PointerOrReference, tok::l_paren) &&
- Style.PointerAlignment != FormatStyle::PAS_Right && Left.Previous &&
+ (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
+ tok::l_paren) &&
+ (Style.PointerAlignment != FormatStyle::PAS_Right &&
+ !Line.IsMultiVariableDeclStmt) &&
+ Left.Previous &&
!Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
if (Right.is(tok::star) && Left.is(tok::l_paren))
return false;
@@ -1700,13 +1822,12 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
(Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
(Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while,
- tok::kw_switch, tok::kw_case) ||
- (Left.isOneOf(tok::kw_try, tok::kw_catch, tok::kw_new,
- tok::kw_delete) &&
- (!Left.Previous || Left.Previous->isNot(tok::period))) ||
- Left.IsForEachMacro)) ||
+ tok::kw_switch, tok::kw_case, TT_ForEachMacro) ||
+ (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
+ tok::kw_new, tok::kw_delete) &&
+ (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
(Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
- (Left.is(tok::identifier) || Left.isFunctionLikeKeyword()) &&
+ (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() || Left.is(tok::r_paren)) &&
Line.Type != LT_PreprocessorDirective);
}
if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
@@ -1748,6 +1869,20 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
} else if (Style.Language == FormatStyle::LK_JavaScript) {
if (Left.is(Keywords.kw_var))
return true;
+ if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
+ return false;
+ if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
+ Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
+ return false;
+ if (Left.is(tok::ellipsis))
+ return false;
+ if (Left.is(TT_TemplateCloser) &&
+ !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
+ Keywords.kw_implements, Keywords.kw_extends))
+ // Type assertions ('<type>expr') are not followed by whitespace. Other
+ // locations that should have whitespace following are identified by the
+ // above set of follower tokens.
+ return false;
} else if (Style.Language == FormatStyle::LK_Java) {
if (Left.is(tok::r_square) && Right.is(tok::l_brace))
return true;
@@ -1789,16 +1924,29 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
return Right.is(tok::coloncolon);
if (Right.is(TT_OverloadedOperatorLParen))
return false;
- if (Right.is(tok::colon))
- return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
- Right.getNextNonComment() && Right.isNot(TT_ObjCMethodExpr) &&
- !Left.is(tok::question) &&
- !(Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) &&
- (Right.isNot(TT_DictLiteral) || Style.SpacesInContainerLiterals);
+ if (Right.is(tok::colon)) {
+ if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
+ !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
+ return false;
+ if (Right.is(TT_ObjCMethodExpr))
+ return false;
+ if (Left.is(tok::question))
+ return false;
+ if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
+ return false;
+ if (Right.is(TT_DictLiteral))
+ return Style.SpacesInContainerLiterals;
+ return true;
+ }
if (Left.is(TT_UnaryOperator))
return Right.is(TT_BinaryOperator);
+
+ // If the next token is a binary operator or a selector name, we have
+ // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
if (Left.is(TT_CastRParen))
- return Style.SpaceAfterCStyleCast || Right.is(TT_BinaryOperator);
+ return Style.SpaceAfterCStyleCast ||
+ Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
+
if (Left.is(tok::greater) && Right.is(tok::greater)) {
return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
(Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
@@ -1819,7 +1967,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr))
return true;
- if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren))
+ if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
+ Right.isNot(TT_FunctionTypeLParen))
return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
@@ -1850,9 +1999,12 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
// intention is to insert a line break after it in order to make shuffling
// around entries easier.
const FormatToken *BeforeClosingBrace = nullptr;
- if (Left.is(tok::l_brace) && Left.BlockKind != BK_Block && Left.MatchingParen)
+ if (Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
+ Left.BlockKind != BK_Block && Left.MatchingParen)
BeforeClosingBrace = Left.MatchingParen->Previous;
- else if (Right.is(tok::r_brace) && Right.BlockKind != BK_Block)
+ else if (Right.MatchingParen &&
+ Right.MatchingParen->isOneOf(tok::l_brace,
+ TT_ArrayInitializerLSquare))
BeforeClosingBrace = &Left;
if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
BeforeClosingBrace->isTrailingComment()))
@@ -1862,8 +2014,10 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
return Left.BlockKind != BK_BracedInit &&
Left.isNot(TT_CtorInitializerColon) &&
(Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
- if (Right.Previous->isTrailingComment() ||
- (Right.isStringLiteral() && Right.Previous->isStringLiteral()))
+ if (Left.isTrailingComment())
+ return true;
+ if (Left.isStringLiteral() &&
+ (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
return true;
if (Right.Previous->IsUnterminatedLiteral)
return true;
@@ -1889,6 +2043,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
Style.Language == FormatStyle::LK_Proto)
// Don't put enums onto single lines in protocol buffers.
return true;
+ if (Right.is(TT_InlineASMBrace))
+ return Right.HasUnescapedNewline;
if (Style.Language == FormatStyle::LK_JavaScript && Right.is(tok::r_brace) &&
Left.is(tok::l_brace) && !Left.Children.empty())
// Support AllowShortFunctionsOnASingleLine for JavaScript.
@@ -1903,8 +2059,12 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
return true;
if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
return true;
- if (Right.is(tok::lessless) && Left.is(tok::identifier) &&
- Left.TokenText == "endl")
+
+ if ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ Left.is(TT_LeadingJavaAnnotation) &&
+ Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
+ Line.Last->is(tok::l_brace))
return true;
if (Style.Language == FormatStyle::LK_JavaScript) {
@@ -1913,13 +2073,15 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
Left.Previous->is(tok::char_constant))
return true;
if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) &&
- Left.NestingLevel == 0)
+ Left.NestingLevel == 0 && Left.Previous &&
+ Left.Previous->is(tok::equal) &&
+ Line.First->isOneOf(tok::identifier, Keywords.kw_import,
+ tok::kw_export) &&
+ // kw_var is a pseudo-token that's a tok::identifier, so matches above.
+ !Line.First->is(Keywords.kw_var))
+ // Enum style object literal.
return true;
} else if (Style.Language == FormatStyle::LK_Java) {
- if (Left.is(TT_LeadingJavaAnnotation) &&
- Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
- Line.Last->is(tok::l_brace))
- return true;
if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
Right.Next->is(tok::string_literal))
return true;
@@ -1947,9 +2109,15 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return false;
if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
return !Right.is(tok::l_paren);
+ if (Right.is(TT_PointerOrReference))
+ return Line.IsMultiVariableDeclStmt ||
+ (Style.PointerAlignment == FormatStyle::PAS_Right &&
+ (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
Right.is(tok::kw_operator))
return true;
+ if (Left.is(TT_PointerOrReference))
+ return false;
if (Right.isTrailingComment())
// We rely on MustBreakBefore being set correctly here as we should not
// change the "binding" behavior of a comment.
@@ -1970,8 +2138,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return false;
if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))
return true;
- if (Right.is(TT_SelectorName))
- return true;
+ if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
+ Right.Next->is(TT_ObjCMethodExpr)))
+ return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
return true;
if (Left.ClosesTemplateDeclaration)
@@ -1983,17 +2152,16 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return true;
if (Right.is(TT_RangeBasedForLoopColon))
return false;
- if (Left.isOneOf(TT_PointerOrReference, TT_TemplateCloser,
- TT_UnaryOperator) ||
+ if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
Left.is(tok::kw_operator))
return false;
- if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
+ if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
+ Line.Type == LT_VirtualFunctionDecl)
return false;
if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
return false;
if (Left.is(tok::l_paren) && Left.Previous &&
- (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen) ||
- Left.Previous->is(tok::kw_if)))
+ (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
return false;
if (Right.is(TT_ImplicitStringLiteral))
return false;
@@ -2027,8 +2195,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
if (Right.is(TT_CtorInitializerComma) &&
Style.BreakConstructorInitializersBeforeComma)
return true;
- if (Left.is(tok::greater) && Right.is(tok::greater) &&
- Left.isNot(TT_TemplateCloser))
+ if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
+ (Left.is(tok::less) && Right.is(tok::less)))
return false;
if (Right.is(TT_BinaryOperator) &&
Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
@@ -2046,8 +2214,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return true;
return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
tok::kw_class, tok::kw_struct) ||
- Right.isMemberAccess() || Right.is(TT_TrailingReturnArrow) ||
- Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) ||
+ Right.isMemberAccess() ||
+ Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
+ tok::colon, tok::l_square, tok::at) ||
(Left.is(tok::r_paren) &&
Right.isOneOf(tok::identifier, tok::kw_const)) ||
(Left.is(tok::l_paren) && !Right.is(tok::r_paren));
diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h
index ff8e32a56afc1..a948cdb1c4194 100644
--- a/lib/Format/TokenAnnotator.h
+++ b/lib/Format/TokenAnnotator.h
@@ -42,8 +42,8 @@ public:
: First(Line.Tokens.front().Tok), Level(Line.Level),
InPPDirective(Line.InPPDirective),
MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
- Affected(false), LeadingEmptyLinesAffected(false),
- ChildrenAffected(false) {
+ IsMultiVariableDeclStmt(false), Affected(false),
+ LeadingEmptyLinesAffected(false), ChildrenAffected(false) {
assert(!Line.Tokens.empty());
// Calculate Next and Previous for all tokens. Note that we must overwrite
@@ -59,11 +59,8 @@ public:
I->Tok->Previous = Current;
Current = Current->Next;
Current->Children.clear();
- for (SmallVectorImpl<UnwrappedLine>::const_iterator
- I = Node.Children.begin(),
- E = Node.Children.end();
- I != E; ++I) {
- Children.push_back(new AnnotatedLine(*I));
+ for (const auto& Child : Node.Children) {
+ Children.push_back(new AnnotatedLine(Child));
Current->Children.push_back(Children.back());
}
}
@@ -75,6 +72,12 @@ public:
for (unsigned i = 0, e = Children.size(); i != e; ++i) {
delete Children[i];
}
+ FormatToken *Current = First;
+ while (Current) {
+ Current->Children.clear();
+ Current->Role.reset();
+ Current = Current->Next;
+ }
}
FormatToken *First;
@@ -87,6 +90,7 @@ public:
bool InPPDirective;
bool MustBeDeclaration;
bool MightBeFunctionDecl;
+ bool IsMultiVariableDeclStmt;
/// \c True if this line should be formatted, i.e. intersects directly or
/// indirectly with one of the input ranges.
@@ -101,8 +105,8 @@ public:
private:
// Disallow copying.
- AnnotatedLine(const AnnotatedLine &) LLVM_DELETED_FUNCTION;
- void operator=(const AnnotatedLine &) LLVM_DELETED_FUNCTION;
+ AnnotatedLine(const AnnotatedLine &) = delete;
+ void operator=(const AnnotatedLine &) = delete;
};
/// \brief Determines extra information about the tokens comprising an
diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp
index ca66e7351641c..cbf8c6c922118 100644
--- a/lib/Format/UnwrappedLineFormatter.cpp
+++ b/lib/Format/UnwrappedLineFormatter.cpp
@@ -25,19 +25,152 @@ bool startsExternCBlock(const AnnotatedLine &Line) {
NextNext && NextNext->is(tok::l_brace);
}
+/// \brief Tracks the indent level of \c AnnotatedLines across levels.
+///
+/// \c nextLine must be called for each \c AnnotatedLine, after which \c
+/// getIndent() will return the indent for the last line \c nextLine was called
+/// with.
+/// If the line is not formatted (and thus the indent does not change), calling
+/// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
+/// subsequent lines on the same level to be indented at the same level as the
+/// given line.
+class LevelIndentTracker {
+public:
+ LevelIndentTracker(const FormatStyle &Style,
+ const AdditionalKeywords &Keywords, unsigned StartLevel,
+ int AdditionalIndent)
+ : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
+ for (unsigned i = 0; i != StartLevel; ++i)
+ IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
+ }
+
+ /// \brief Returns the indent for the current line.
+ unsigned getIndent() const { return Indent; }
+
+ /// \brief Update the indent state given that \p Line is going to be formatted
+ /// next.
+ void nextLine(const AnnotatedLine &Line) {
+ Offset = getIndentOffset(*Line.First);
+ if (Line.InPPDirective) {
+ Indent = Line.Level * Style.IndentWidth + AdditionalIndent;
+ } else {
+ while (IndentForLevel.size() <= Line.Level)
+ IndentForLevel.push_back(-1);
+ IndentForLevel.resize(Line.Level + 1);
+ Indent = getIndent(IndentForLevel, Line.Level);
+ }
+ if (static_cast<int>(Indent) + Offset >= 0)
+ Indent += Offset;
+ }
+
+ /// \brief Update the level indent to adapt to the given \p Line.
+ ///
+ /// When a line is not formatted, we move the subsequent lines on the same
+ /// level to the same indent.
+ /// Note that \c nextLine must have been called before this method.
+ void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
+ unsigned LevelIndent = Line.First->OriginalColumn;
+ if (static_cast<int>(LevelIndent) - Offset >= 0)
+ LevelIndent -= Offset;
+ if ((Line.First->isNot(tok::comment) || IndentForLevel[Line.Level] == -1) &&
+ !Line.InPPDirective)
+ IndentForLevel[Line.Level] = LevelIndent;
+ }
+
+private:
+ /// \brief Get the offset of the line relatively to the level.
+ ///
+ /// For example, 'public:' labels in classes are offset by 1 or 2
+ /// characters to the left from their level.
+ int getIndentOffset(const FormatToken &RootToken) {
+ if (Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript)
+ return 0;
+ if (RootToken.isAccessSpecifier(false) ||
+ RootToken.isObjCAccessSpecifier() ||
+ (RootToken.is(Keywords.kw_signals) && RootToken.Next &&
+ RootToken.Next->is(tok::colon)))
+ return Style.AccessModifierOffset;
+ return 0;
+ }
+
+ /// \brief Get the indent of \p Level from \p IndentForLevel.
+ ///
+ /// \p IndentForLevel must contain the indent for the level \c l
+ /// at \p IndentForLevel[l], or a value < 0 if the indent for
+ /// that level is unknown.
+ unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) {
+ if (IndentForLevel[Level] != -1)
+ return IndentForLevel[Level];
+ if (Level == 0)
+ return 0;
+ return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
+ }
+
+ const FormatStyle &Style;
+ const AdditionalKeywords &Keywords;
+ const unsigned AdditionalIndent;
+
+ /// \brief The indent in characters for each level.
+ std::vector<int> IndentForLevel;
+
+ /// \brief Offset of the current line relative to the indent level.
+ ///
+ /// For example, the 'public' keywords is often indented with a negative
+ /// offset.
+ int Offset = 0;
+
+ /// \brief The current line's indent.
+ unsigned Indent = 0;
+};
+
class LineJoiner {
public:
- LineJoiner(const FormatStyle &Style) : Style(Style) {}
+ LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
+ const SmallVectorImpl<AnnotatedLine *> &Lines)
+ : Style(Style), Keywords(Keywords), End(Lines.end()),
+ Next(Lines.begin()) {}
+
+ /// \brief Returns the next line, merging multiple lines into one if possible.
+ const AnnotatedLine *getNextMergedLine(bool DryRun,
+ LevelIndentTracker &IndentTracker) {
+ if (Next == End)
+ return nullptr;
+ const AnnotatedLine *Current = *Next;
+ IndentTracker.nextLine(*Current);
+ unsigned MergedLines =
+ tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End);
+ if (MergedLines > 0 && Style.ColumnLimit == 0)
+ // Disallow line merging if there is a break at the start of one of the
+ // input lines.
+ for (unsigned i = 0; i < MergedLines; ++i)
+ if (Next[i + 1]->First->NewlinesBefore > 0)
+ MergedLines = 0;
+ if (!DryRun)
+ for (unsigned i = 0; i < MergedLines; ++i)
+ join(*Next[i], *Next[i + 1]);
+ Next = Next + MergedLines + 1;
+ return Current;
+ }
+private:
/// \brief Calculates how many lines can be merged into 1 starting at \p I.
unsigned
tryFitMultipleLinesInOne(unsigned Indent,
SmallVectorImpl<AnnotatedLine *>::const_iterator I,
SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
+ // Can't join the last line with anything.
+ if (I + 1 == E)
+ return 0;
// We can never merge stuff if there are trailing line comments.
const AnnotatedLine *TheLine = *I;
if (TheLine->Last->is(TT_LineComment))
return 0;
+ if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
+ return 0;
+ if (TheLine->InPPDirective &&
+ (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline))
+ return 0;
if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
return 0;
@@ -50,9 +183,6 @@ public:
? 0
: Limit - TheLine->Last->TotalLength;
- if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
- return 0;
-
// FIXME: TheLine->Level != 0 might or might not be the right check to do.
// If necessary, change to something smarter.
bool MergeShortFunctions =
@@ -113,15 +243,12 @@ public:
return 0;
}
-private:
unsigned
tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
SmallVectorImpl<AnnotatedLine *>::const_iterator E,
unsigned Limit) {
if (Limit == 0)
return 0;
- if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
- return 0;
if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
return 0;
if (1 + I[1]->Last->TotalLength > Limit)
@@ -147,8 +274,8 @@ private:
return 0;
if (1 + I[1]->Last->TotalLength > Limit)
return 0;
- if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
- tok::kw_while, TT_LineComment))
+ if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
+ TT_LineComment))
return 0;
// Only inline simple if's (no nested if or else).
if (I + 2 != E && Line.First->is(tok::kw_if) &&
@@ -157,9 +284,10 @@ private:
return 1;
}
- unsigned tryMergeShortCaseLabels(
- SmallVectorImpl<AnnotatedLine *>::const_iterator I,
- SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
+ unsigned
+ tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E,
+ unsigned Limit) {
if (Limit == 0 || I + 1 == E ||
I[1]->First->isOneOf(tok::kw_case, tok::kw_default))
return 0;
@@ -191,16 +319,21 @@ private:
AnnotatedLine &Line = **I;
// Don't merge ObjC @ keywords and methods.
+ // FIXME: If an option to allow short exception handling clauses on a single
+ // line is added, change this to not return for @try and friends.
if (Style.Language != FormatStyle::LK_Java &&
Line.First->isOneOf(tok::at, tok::minus, tok::plus))
return 0;
// Check that the current line allows merging. This depends on whether we
// are in a control flow statements as well as several style flags.
- if (Line.First->isOneOf(tok::kw_else, tok::kw_case))
+ if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
+ (Line.First->Next && Line.First->Next->is(tok::kw_else)))
return 0;
if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
- tok::kw_catch, tok::kw_for, tok::r_brace)) {
+ tok::kw___try, tok::kw_catch, tok::kw___finally,
+ tok::kw_for, tok::r_brace) ||
+ Line.First->is(Keywords.kw___except)) {
if (!Style.AllowShortBlocksOnASingleLine)
return 0;
if (!Style.AllowShortIfStatementsOnASingleLine &&
@@ -211,7 +344,11 @@ private:
return 0;
// FIXME: Consider an option to allow short exception handling clauses on
// a single line.
- if (Line.First->isOneOf(tok::kw_try, tok::kw_catch))
+ // FIXME: This isn't covered by tests.
+ // FIXME: For catch, __except, __finally the first token on the line
+ // is '}', so this isn't correct here.
+ if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
+ Keywords.kw___except, tok::kw___finally))
return 0;
}
@@ -226,7 +363,8 @@ private:
} else if (Limit != 0 && Line.First->isNot(tok::kw_namespace) &&
!startsExternCBlock(Line)) {
// We don't merge short records.
- if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct))
+ if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
+ Keywords.kw_interface))
return 0;
// Check that we still have three lines and they fit into the limit.
@@ -252,6 +390,10 @@ private:
if (Tok->isNot(tok::r_brace))
return 0;
+ // Don't merge "if (a) { .. } else {".
+ if (Tok->Next && Tok->Next->is(tok::kw_else))
+ return 0;
+
return 2;
}
return 0;
@@ -285,28 +427,367 @@ private:
return false;
}
+ void join(AnnotatedLine &A, const AnnotatedLine &B) {
+ assert(!A.Last->Next);
+ assert(!B.First->Previous);
+ if (B.Affected)
+ A.Affected = true;
+ A.Last->Next = B.First;
+ B.First->Previous = A.Last;
+ B.First->CanBreakBefore = true;
+ unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
+ for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
+ Tok->TotalLength += LengthA;
+ A.Last = Tok;
+ }
+ }
+
const FormatStyle &Style;
+ const AdditionalKeywords &Keywords;
+ const SmallVectorImpl<AnnotatedLine*>::const_iterator End;
+
+ SmallVectorImpl<AnnotatedLine*>::const_iterator Next;
};
-class NoColumnLimitFormatter {
+static void markFinalized(FormatToken *Tok) {
+ for (; Tok; Tok = Tok->Next) {
+ Tok->Finalized = true;
+ for (AnnotatedLine *Child : Tok->Children)
+ markFinalized(Child->First);
+ }
+}
+
+#ifndef NDEBUG
+static void printLineState(const LineState &State) {
+ llvm::dbgs() << "State: ";
+ for (const ParenState &P : State.Stack) {
+ llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
+ << " ";
+ }
+ llvm::dbgs() << State.NextToken->TokenText << "\n";
+}
+#endif
+
+/// \brief Base class for classes that format one \c AnnotatedLine.
+class LineFormatter {
public:
- NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
+ LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
+ const FormatStyle &Style,
+ UnwrappedLineFormatter *BlockFormatter)
+ : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
+ BlockFormatter(BlockFormatter) {}
+ virtual ~LineFormatter() {}
+
+ /// \brief Formats an \c AnnotatedLine and returns the penalty.
+ ///
+ /// If \p DryRun is \c false, directly applies the changes.
+ virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) = 0;
+
+protected:
+ /// \brief If the \p State's next token is an r_brace closing a nested block,
+ /// format the nested block before it.
+ ///
+ /// Returns \c true if all children could be placed successfully and adapts
+ /// \p Penalty as well as \p State. If \p DryRun is false, also directly
+ /// creates changes using \c Whitespaces.
+ ///
+ /// The crucial idea here is that children always get formatted upon
+ /// encountering the closing brace right after the nested block. Now, if we
+ /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
+ /// \c false), the entire block has to be kept on the same line (which is only
+ /// possible if it fits on the line, only contains a single statement, etc.
+ ///
+ /// If \p NewLine is true, we format the nested block on separate lines, i.e.
+ /// break after the "{", format all lines with correct indentation and the put
+ /// the closing "}" on yet another new line.
+ ///
+ /// This enables us to keep the simple structure of the
+ /// \c UnwrappedLineFormatter, where we only have two options for each token:
+ /// break or don't break.
+ bool formatChildren(LineState &State, bool NewLine, bool DryRun,
+ unsigned &Penalty) {
+ const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
+ FormatToken &Previous = *State.NextToken->Previous;
+ if (!LBrace || LBrace->isNot(tok::l_brace) ||
+ LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
+ // The previous token does not open a block. Nothing to do. We don't
+ // assert so that we can simply call this function for all tokens.
+ return true;
+
+ if (NewLine) {
+ int AdditionalIndent = State.Stack.back().Indent -
+ Previous.Children[0]->Level * Style.IndentWidth;
+
+ Penalty +=
+ BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
+ /*FixBadIndentation=*/true);
+ return true;
+ }
+
+ if (Previous.Children[0]->First->MustBreakBefore)
+ return false;
+
+ // Cannot merge multiple statements into a single line.
+ if (Previous.Children.size() > 1)
+ return false;
+
+ // Cannot merge into one line if this line ends on a comment.
+ if (Previous.is(tok::comment))
+ return false;
+
+ // We can't put the closing "}" on a line with a trailing comment.
+ if (Previous.Children[0]->Last->isTrailingComment())
+ return false;
+
+ // If the child line exceeds the column limit, we wouldn't want to merge it.
+ // We add +2 for the trailing " }".
+ if (Style.ColumnLimit > 0 &&
+ Previous.Children[0]->Last->TotalLength + State.Column + 2 >
+ Style.ColumnLimit)
+ return false;
+
+ if (!DryRun) {
+ Whitespaces->replaceWhitespace(
+ *Previous.Children[0]->First,
+ /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
+ /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
+ }
+ Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun);
+
+ State.Column += 1 + Previous.Children[0]->Last->TotalLength;
+ return true;
+ }
+
+ ContinuationIndenter *Indenter;
+
+private:
+ WhitespaceManager *Whitespaces;
+ const FormatStyle &Style;
+ UnwrappedLineFormatter *BlockFormatter;
+};
- /// \brief Formats the line starting at \p State, simply keeping all of the
- /// input's line breaking decisions.
- void format(unsigned FirstIndent, const AnnotatedLine *Line) {
+/// \brief Formatter that keeps the existing line breaks.
+class NoColumnLimitLineFormatter : public LineFormatter {
+public:
+ NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
+ WhitespaceManager *Whitespaces,
+ const FormatStyle &Style,
+ UnwrappedLineFormatter *BlockFormatter)
+ : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+ /// \brief Formats the line, simply keeping all of the input's line breaking
+ /// decisions.
+ unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) override {
+ assert(!DryRun);
LineState State =
- Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
+ Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
while (State.NextToken) {
bool Newline =
Indenter->mustBreak(State) ||
(Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
+ unsigned Penalty = 0;
+ formatChildren(State, Newline, /*DryRun=*/false, Penalty);
Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
}
+ return 0;
+ }
+};
+
+/// \brief Formatter that puts all tokens into a single line without breaks.
+class NoLineBreakFormatter : public LineFormatter {
+public:
+ NoLineBreakFormatter(ContinuationIndenter *Indenter,
+ WhitespaceManager *Whitespaces, const FormatStyle &Style,
+ UnwrappedLineFormatter *BlockFormatter)
+ : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+ /// \brief Puts all tokens into a single line.
+ unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) {
+ unsigned Penalty = 0;
+ LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+ while (State.NextToken) {
+ formatChildren(State, /*Newline=*/false, DryRun, Penalty);
+ Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
+ }
+ return Penalty;
+ }
+};
+
+/// \brief Finds the best way to break lines.
+class OptimizingLineFormatter : public LineFormatter {
+public:
+ OptimizingLineFormatter(ContinuationIndenter *Indenter,
+ WhitespaceManager *Whitespaces,
+ const FormatStyle &Style,
+ UnwrappedLineFormatter *BlockFormatter)
+ : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+ /// \brief Formats the line by finding the best line breaks with line lengths
+ /// below the column limit.
+ unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) {
+ LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+
+ // If the ObjC method declaration does not fit on a line, we should format
+ // it with one arg per line.
+ if (State.Line->Type == LT_ObjCMethodDecl)
+ State.Stack.back().BreakBeforeParameter = true;
+
+ // Find best solution in solution space.
+ return analyzeSolutionSpace(State, DryRun);
}
private:
- ContinuationIndenter *Indenter;
+ struct CompareLineStatePointers {
+ bool operator()(LineState *obj1, LineState *obj2) const {
+ return *obj1 < *obj2;
+ }
+ };
+
+ /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
+ ///
+ /// In case of equal penalties, we want to prefer states that were inserted
+ /// first. During state generation we make sure that we insert states first
+ /// that break the line as late as possible.
+ typedef std::pair<unsigned, unsigned> OrderedPenalty;
+
+ /// \brief An edge in the solution space from \c Previous->State to \c State,
+ /// inserting a newline dependent on the \c NewLine.
+ struct StateNode {
+ StateNode(const LineState &State, bool NewLine, StateNode *Previous)
+ : State(State), NewLine(NewLine), Previous(Previous) {}
+ LineState State;
+ bool NewLine;
+ StateNode *Previous;
+ };
+
+ /// \brief An item in the prioritized BFS search queue. The \c StateNode's
+ /// \c State has the given \c OrderedPenalty.
+ typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
+
+ /// \brief The BFS queue type.
+ typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
+ std::greater<QueueItem>> QueueType;
+
+ /// \brief Analyze the entire solution space starting from \p InitialState.
+ ///
+ /// This implements a variant of Dijkstra's algorithm on the graph that spans
+ /// the solution space (\c LineStates are the nodes). The algorithm tries to
+ /// find the shortest path (the one with lowest penalty) from \p InitialState
+ /// to a state where all tokens are placed. Returns the penalty.
+ ///
+ /// If \p DryRun is \c false, directly applies the changes.
+ unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
+ std::set<LineState *, CompareLineStatePointers> Seen;
+
+ // Increasing count of \c StateNode items we have created. This is used to
+ // create a deterministic order independent of the container.
+ unsigned Count = 0;
+ QueueType Queue;
+
+ // Insert start element into queue.
+ StateNode *Node =
+ new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
+ Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
+ ++Count;
+
+ unsigned Penalty = 0;
+
+ // While not empty, take first element and follow edges.
+ while (!Queue.empty()) {
+ Penalty = Queue.top().first.first;
+ StateNode *Node = Queue.top().second;
+ if (!Node->State.NextToken) {
+ DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
+ break;
+ }
+ Queue.pop();
+
+ // Cut off the analysis of certain solutions if the analysis gets too
+ // complex. See description of IgnoreStackForComparison.
+ if (Count > 10000)
+ Node->State.IgnoreStackForComparison = true;
+
+ if (!Seen.insert(&Node->State).second)
+ // State already examined with lower penalty.
+ continue;
+
+ FormatDecision LastFormat = Node->State.NextToken->Decision;
+ if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
+ addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
+ if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
+ addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
+ }
+
+ if (Queue.empty()) {
+ // We were unable to find a solution, do nothing.
+ // FIXME: Add diagnostic?
+ DEBUG(llvm::dbgs() << "Could not find a solution.\n");
+ return 0;
+ }
+
+ // Reconstruct the solution.
+ if (!DryRun)
+ reconstructPath(InitialState, Queue.top().second);
+
+ DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
+ DEBUG(llvm::dbgs() << "---\n");
+
+ return Penalty;
+ }
+
+ /// \brief Add the following state to the analysis queue \c Queue.
+ ///
+ /// Assume the current state is \p PreviousNode and has been reached with a
+ /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
+ void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
+ bool NewLine, unsigned *Count, QueueType *Queue) {
+ if (NewLine && !Indenter->canBreak(PreviousNode->State))
+ return;
+ if (!NewLine && Indenter->mustBreak(PreviousNode->State))
+ return;
+
+ StateNode *Node = new (Allocator.Allocate())
+ StateNode(PreviousNode->State, NewLine, PreviousNode);
+ if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
+ return;
+
+ Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
+
+ Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
+ ++(*Count);
+ }
+
+ /// \brief Applies the best formatting by reconstructing the path in the
+ /// solution space that leads to \c Best.
+ void reconstructPath(LineState &State, StateNode *Best) {
+ std::deque<StateNode *> Path;
+ // We do not need a break before the initial token.
+ while (Best->Previous) {
+ Path.push_front(Best);
+ Best = Best->Previous;
+ }
+ for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
+ I != E; ++I) {
+ unsigned Penalty = 0;
+ formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
+ Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
+
+ DEBUG({
+ printLineState((*I)->Previous->State);
+ if ((*I)->NewLine) {
+ llvm::dbgs() << "Penalty for placing "
+ << (*I)->Previous->State.NextToken->Tok.getName() << ": "
+ << Penalty << "\n";
+ }
+ });
+ }
+ }
+
+ llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
};
} // namespace
@@ -315,7 +796,7 @@ unsigned
UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
bool DryRun, int AdditionalIndent,
bool FixBadIndentation) {
- LineJoiner Joiner(Style);
+ LineJoiner Joiner(Style, Keywords, Lines);
// Try to look up already computed penalty in DryRun-mode.
std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
@@ -326,151 +807,93 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
assert(!Lines.empty());
unsigned Penalty = 0;
- std::vector<int> IndentForLevel;
- for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
- IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
+ LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
+ AdditionalIndent);
const AnnotatedLine *PreviousLine = nullptr;
- for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
- E = Lines.end();
- I != E; ++I) {
- const AnnotatedLine &TheLine = **I;
- const FormatToken *FirstTok = TheLine.First;
- int Offset = getIndentOffset(*FirstTok);
-
- // Determine indent and try to merge multiple unwrapped lines.
- unsigned Indent;
- if (TheLine.InPPDirective) {
- Indent = TheLine.Level * Style.IndentWidth;
- } else {
- while (IndentForLevel.size() <= TheLine.Level)
- IndentForLevel.push_back(-1);
- IndentForLevel.resize(TheLine.Level + 1);
- Indent = getIndent(IndentForLevel, TheLine.Level);
- }
- unsigned LevelIndent = Indent;
- if (static_cast<int>(Indent) + Offset >= 0)
- Indent += Offset;
-
- // Merge multiple lines if possible.
- unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
- if (MergedLines > 0 && Style.ColumnLimit == 0) {
- // Disallow line merging if there is a break at the start of one of the
- // input lines.
- for (unsigned i = 0; i < MergedLines; ++i) {
- if (I[i + 1]->First->NewlinesBefore > 0)
- MergedLines = 0;
- }
- }
- if (!DryRun) {
- for (unsigned i = 0; i < MergedLines; ++i) {
- join(*I[i], *I[i + 1]);
- }
- }
- I += MergedLines;
-
+ const AnnotatedLine *NextLine = nullptr;
+ for (const AnnotatedLine *Line =
+ Joiner.getNextMergedLine(DryRun, IndentTracker);
+ Line; Line = NextLine) {
+ const AnnotatedLine &TheLine = *Line;
+ unsigned Indent = IndentTracker.getIndent();
bool FixIndentation =
- FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
- if (TheLine.First->is(tok::eof)) {
- if (PreviousLine && PreviousLine->Affected && !DryRun) {
- // Remove the file's trailing whitespace.
- unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
- Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
- /*IndentLevel=*/0, /*Spaces=*/0,
- /*TargetColumn=*/0);
- }
- } else if (TheLine.Type != LT_Invalid &&
- (TheLine.Affected || FixIndentation)) {
- if (FirstTok->WhitespaceRange.isValid()) {
- if (!DryRun)
- formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
+ FixBadIndentation && (Indent != TheLine.First->OriginalColumn);
+ bool ShouldFormat = TheLine.Affected || FixIndentation;
+ // We cannot format this line; if the reason is that the line had a
+ // parsing error, remember that.
+ if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat)
+ *IncompleteFormat = true;
+
+ if (ShouldFormat && TheLine.Type != LT_Invalid) {
+ if (!DryRun)
+ formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
+ TheLine.InPPDirective);
+
+ NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
+ unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
+ bool FitsIntoOneLine =
+ TheLine.Last->TotalLength + Indent <= ColumnLimit ||
+ TheLine.Type == LT_ImportStatement;
+
+ if (Style.ColumnLimit == 0)
+ NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
+ .formatLine(TheLine, Indent, DryRun);
+ else if (FitsIntoOneLine)
+ Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
+ .formatLine(TheLine, Indent, DryRun);
+ else
+ Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
+ .formatLine(TheLine, Indent, DryRun);
+ } else {
+ // If no token in the current line is affected, we still need to format
+ // affected children.
+ if (TheLine.ChildrenAffected)
+ format(TheLine.Children, DryRun);
+
+ // Adapt following lines on the current indent level to the same level
+ // unless the current \c AnnotatedLine is not at the beginning of a line.
+ bool StartsNewLine =
+ TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
+ if (StartsNewLine)
+ IndentTracker.adjustToUnmodifiedLine(TheLine);
+ if (!DryRun) {
+ bool ReformatLeadingWhitespace =
+ StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
+ TheLine.LeadingEmptyLinesAffected);
+ // Format the first token.
+ if (ReformatLeadingWhitespace)
+ formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
+ TheLine.First->OriginalColumn,
TheLine.InPPDirective);
- } else {
- Indent = LevelIndent = FirstTok->OriginalColumn;
- }
-
- // If everything fits on a single line, just put it there.
- unsigned ColumnLimit = Style.ColumnLimit;
- if (I + 1 != E) {
- AnnotatedLine *NextLine = I[1];
- if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
- ColumnLimit = getColumnLimit(TheLine.InPPDirective);
- }
+ else
+ Whitespaces->addUntouchableToken(*TheLine.First,
+ TheLine.InPPDirective);
- if (TheLine.Last->TotalLength + Indent <= ColumnLimit ||
- TheLine.Type == LT_ImportStatement) {
- LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
- while (State.NextToken) {
- formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty);
- Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
- }
- } else if (Style.ColumnLimit == 0) {
- // FIXME: Implement nested blocks for ColumnLimit = 0.
- NoColumnLimitFormatter Formatter(Indenter);
- if (!DryRun)
- Formatter.format(Indent, &TheLine);
- } else {
- Penalty += format(TheLine, Indent, DryRun);
- }
-
- if (!TheLine.InPPDirective)
- IndentForLevel[TheLine.Level] = LevelIndent;
- } else if (TheLine.ChildrenAffected) {
- format(TheLine.Children, DryRun);
- } else {
- // Format the first token if necessary, and notify the WhitespaceManager
- // about the unchanged whitespace.
- for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) {
- if (Tok == TheLine.First && (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
- unsigned LevelIndent = Tok->OriginalColumn;
- if (!DryRun) {
- // Remove trailing whitespace of the previous line.
- if ((PreviousLine && PreviousLine->Affected) ||
- TheLine.LeadingEmptyLinesAffected) {
- formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
- TheLine.InPPDirective);
- } else {
- Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
- }
- }
-
- if (static_cast<int>(LevelIndent) - Offset >= 0)
- LevelIndent -= Offset;
- if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
- IndentForLevel[TheLine.Level] = LevelIndent;
- } else if (!DryRun) {
+ // Notify the WhitespaceManager about the unchanged whitespace.
+ for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
- }
- }
- }
- if (!DryRun) {
- for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) {
- Tok->Finalized = true;
}
+ NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
}
- PreviousLine = *I;
+ if (!DryRun)
+ markFinalized(TheLine.First);
+ PreviousLine = &TheLine;
}
PenaltyCache[CacheKey] = Penalty;
return Penalty;
}
-unsigned UnwrappedLineFormatter::format(const AnnotatedLine &Line,
- unsigned FirstIndent, bool DryRun) {
- LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
-
- // If the ObjC method declaration does not fit on a line, we should format
- // it with one arg per line.
- if (State.Line->Type == LT_ObjCMethodDecl)
- State.Stack.back().BreakBeforeParameter = true;
-
- // Find best solution in solution space.
- return analyzeSolutionSpace(State, DryRun);
-}
-
void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
const AnnotatedLine *PreviousLine,
unsigned IndentLevel,
unsigned Indent,
bool InPPDirective) {
+ if (RootToken.is(tok::eof)) {
+ unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
+ Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0,
+ /*Spaces=*/0, /*TargetColumn=*/0);
+ return;
+ }
unsigned Newlines =
std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
// Remove empty lines before "}" where applicable.
@@ -496,7 +919,8 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
++Newlines;
// Remove empty lines after access specifiers.
- if (PreviousLine && PreviousLine->First->isAccessSpecifier())
+ if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
+ (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
Newlines = std::min(1u, Newlines);
Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
@@ -504,202 +928,21 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
!RootToken.HasUnescapedNewline);
}
-/// \brief Get the indent of \p Level from \p IndentForLevel.
-///
-/// \p IndentForLevel must contain the indent for the level \c l
-/// at \p IndentForLevel[l], or a value < 0 if the indent for
-/// that level is unknown.
-unsigned UnwrappedLineFormatter::getIndent(ArrayRef<int> IndentForLevel,
- unsigned Level) {
- if (IndentForLevel[Level] != -1)
- return IndentForLevel[Level];
- if (Level == 0)
- return 0;
- return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
-}
-
-void UnwrappedLineFormatter::join(AnnotatedLine &A, const AnnotatedLine &B) {
- assert(!A.Last->Next);
- assert(!B.First->Previous);
- if (B.Affected)
- A.Affected = true;
- A.Last->Next = B.First;
- B.First->Previous = A.Last;
- B.First->CanBreakBefore = true;
- unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
- for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
- Tok->TotalLength += LengthA;
- A.Last = Tok;
- }
-}
-
-unsigned UnwrappedLineFormatter::analyzeSolutionSpace(LineState &InitialState,
- bool DryRun) {
- std::set<LineState *, CompareLineStatePointers> Seen;
-
- // Increasing count of \c StateNode items we have created. This is used to
- // create a deterministic order independent of the container.
- unsigned Count = 0;
- QueueType Queue;
-
- // Insert start element into queue.
- StateNode *Node =
- new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
- Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
- ++Count;
-
- unsigned Penalty = 0;
-
- // While not empty, take first element and follow edges.
- while (!Queue.empty()) {
- Penalty = Queue.top().first.first;
- StateNode *Node = Queue.top().second;
- if (!Node->State.NextToken) {
- DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
- break;
- }
- Queue.pop();
-
- // Cut off the analysis of certain solutions if the analysis gets too
- // complex. See description of IgnoreStackForComparison.
- if (Count > 10000)
- Node->State.IgnoreStackForComparison = true;
-
- if (!Seen.insert(&Node->State).second)
- // State already examined with lower penalty.
- continue;
-
- FormatDecision LastFormat = Node->State.NextToken->Decision;
- if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
- addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
- if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
- addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
- }
-
- if (Queue.empty()) {
- // We were unable to find a solution, do nothing.
- // FIXME: Add diagnostic?
- DEBUG(llvm::dbgs() << "Could not find a solution.\n");
- return 0;
- }
-
- // Reconstruct the solution.
- if (!DryRun)
- reconstructPath(InitialState, Queue.top().second);
-
- DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
- DEBUG(llvm::dbgs() << "---\n");
-
- return Penalty;
-}
-
-#ifndef NDEBUG
-static void printLineState(const LineState &State) {
- llvm::dbgs() << "State: ";
- for (const ParenState &P : State.Stack) {
- llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
- << " ";
- }
- llvm::dbgs() << State.NextToken->TokenText << "\n";
-}
-#endif
-
-void UnwrappedLineFormatter::reconstructPath(LineState &State,
- StateNode *Current) {
- std::deque<StateNode *> Path;
- // We do not need a break before the initial token.
- while (Current->Previous) {
- Path.push_front(Current);
- Current = Current->Previous;
- }
- for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
- I != E; ++I) {
- unsigned Penalty = 0;
- formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
- Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
-
- DEBUG({
- printLineState((*I)->Previous->State);
- if ((*I)->NewLine) {
- llvm::dbgs() << "Penalty for placing "
- << (*I)->Previous->State.NextToken->Tok.getName() << ": "
- << Penalty << "\n";
- }
- });
- }
-}
-
-void UnwrappedLineFormatter::addNextStateToQueue(unsigned Penalty,
- StateNode *PreviousNode,
- bool NewLine, unsigned *Count,
- QueueType *Queue) {
- if (NewLine && !Indenter->canBreak(PreviousNode->State))
- return;
- if (!NewLine && Indenter->mustBreak(PreviousNode->State))
- return;
-
- StateNode *Node = new (Allocator.Allocate())
- StateNode(PreviousNode->State, NewLine, PreviousNode);
- if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
- return;
-
- Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
-
- Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
- ++(*Count);
-}
-
-bool UnwrappedLineFormatter::formatChildren(LineState &State, bool NewLine,
- bool DryRun, unsigned &Penalty) {
- FormatToken &Previous = *State.NextToken->Previous;
- const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
- if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind != BK_Block ||
- Previous.Children.size() == 0)
- // The previous token does not open a block. Nothing to do. We don't
- // assert so that we can simply call this function for all tokens.
- return true;
-
- if (NewLine) {
- int AdditionalIndent = State.Stack.back().Indent -
- Previous.Children[0]->Level * Style.IndentWidth;
-
- Penalty += format(Previous.Children, DryRun, AdditionalIndent,
- /*FixBadIndentation=*/true);
- return true;
- }
-
- if (Previous.Children[0]->First->MustBreakBefore)
- return false;
-
- // Cannot merge multiple statements into a single line.
- if (Previous.Children.size() > 1)
- return false;
-
- // Cannot merge into one line if this line ends on a comment.
- if (Previous.is(tok::comment))
- return false;
-
- // We can't put the closing "}" on a line with a trailing comment.
- if (Previous.Children[0]->Last->isTrailingComment())
- return false;
-
- // If the child line exceeds the column limit, we wouldn't want to merge it.
- // We add +2 for the trailing " }".
- if (Style.ColumnLimit > 0 &&
- Previous.Children[0]->Last->TotalLength + State.Column + 2 >
- Style.ColumnLimit)
- return false;
-
- if (!DryRun) {
- Whitespaces->replaceWhitespace(
- *Previous.Children[0]->First,
- /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
- /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
- }
- Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
-
- State.Column += 1 + Previous.Children[0]->Last->TotalLength;
- return true;
+unsigned
+UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
+ const AnnotatedLine *NextLine) const {
+ // In preprocessor directives reserve two chars for trailing " \" if the
+ // next line continues the preprocessor directive.
+ bool ContinuesPPDirective =
+ InPPDirective &&
+ // If there is no next line, this is likely a child line and the parent
+ // continues the preprocessor directive.
+ (!NextLine ||
+ (NextLine->InPPDirective &&
+ // If there is an unescaped newline between this line and the next, the
+ // next line starts a new preprocessor directive.
+ !NextLine->First->HasUnescapedNewline));
+ return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
}
} // namespace format
diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h
index 3ae6dbc4db0b3..da9aa1c605e4c 100644
--- a/lib/Format/UnwrappedLineFormatter.h
+++ b/lib/Format/UnwrappedLineFormatter.h
@@ -32,135 +32,39 @@ class UnwrappedLineFormatter {
public:
UnwrappedLineFormatter(ContinuationIndenter *Indenter,
WhitespaceManager *Whitespaces,
- const FormatStyle &Style)
- : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style) {}
+ const FormatStyle &Style,
+ const AdditionalKeywords &Keywords,
+ bool *IncompleteFormat)
+ : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
+ Keywords(Keywords), IncompleteFormat(IncompleteFormat) {}
- unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
- int AdditionalIndent = 0, bool FixBadIndentation = false);
+ /// \brief Format the current block and return the penalty.
+ unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines,
+ bool DryRun = false, int AdditionalIndent = 0,
+ bool FixBadIndentation = false);
private:
- /// \brief Formats an \c AnnotatedLine and returns the penalty.
- ///
- /// If \p DryRun is \c false, directly applies the changes.
- unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
- bool DryRun);
-
- /// \brief An edge in the solution space from \c Previous->State to \c State,
- /// inserting a newline dependent on the \c NewLine.
- struct StateNode {
- StateNode(const LineState &State, bool NewLine, StateNode *Previous)
- : State(State), NewLine(NewLine), Previous(Previous) {}
- LineState State;
- bool NewLine;
- StateNode *Previous;
- };
-
- /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
- ///
- /// In case of equal penalties, we want to prefer states that were inserted
- /// first. During state generation we make sure that we insert states first
- /// that break the line as late as possible.
- typedef std::pair<unsigned, unsigned> OrderedPenalty;
-
- /// \brief An item in the prioritized BFS search queue. The \c StateNode's
- /// \c State has the given \c OrderedPenalty.
- typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
-
- /// \brief The BFS queue type.
- typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
- std::greater<QueueItem> > QueueType;
-
- /// \brief Get the offset of the line relatively to the level.
- ///
- /// For example, 'public:' labels in classes are offset by 1 or 2
- /// characters to the left from their level.
- int getIndentOffset(const FormatToken &RootToken) {
- if (Style.Language == FormatStyle::LK_Java)
- return 0;
- if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
- return Style.AccessModifierOffset;
- return 0;
- }
-
/// \brief Add a new line and the required indent before the first Token
/// of the \c UnwrappedLine if there was no structural parsing error.
void formatFirstToken(FormatToken &RootToken,
const AnnotatedLine *PreviousLine, unsigned IndentLevel,
unsigned Indent, bool InPPDirective);
- /// \brief Get the indent of \p Level from \p IndentForLevel.
- ///
- /// \p IndentForLevel must contain the indent for the level \c l
- /// at \p IndentForLevel[l], or a value < 0 if the indent for
- /// that level is unknown.
- unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level);
-
- void join(AnnotatedLine &A, const AnnotatedLine &B);
-
- unsigned getColumnLimit(bool InPPDirective) const {
- // In preprocessor directives reserve two chars for trailing " \"
- return Style.ColumnLimit - (InPPDirective ? 2 : 0);
- }
-
- struct CompareLineStatePointers {
- bool operator()(LineState *obj1, LineState *obj2) const {
- return *obj1 < *obj2;
- }
- };
-
- /// \brief Analyze the entire solution space starting from \p InitialState.
- ///
- /// This implements a variant of Dijkstra's algorithm on the graph that spans
- /// the solution space (\c LineStates are the nodes). The algorithm tries to
- /// find the shortest path (the one with lowest penalty) from \p InitialState
- /// to a state where all tokens are placed. Returns the penalty.
- ///
- /// If \p DryRun is \c false, directly applies the changes.
- unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false);
-
- void reconstructPath(LineState &State, StateNode *Current);
-
- /// \brief Add the following state to the analysis queue \c Queue.
- ///
- /// Assume the current state is \p PreviousNode and has been reached with a
- /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
- void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
- bool NewLine, unsigned *Count, QueueType *Queue);
-
- /// \brief If the \p State's next token is an r_brace closing a nested block,
- /// format the nested block before it.
- ///
- /// Returns \c true if all children could be placed successfully and adapts
- /// \p Penalty as well as \p State. If \p DryRun is false, also directly
- /// creates changes using \c Whitespaces.
- ///
- /// The crucial idea here is that children always get formatted upon
- /// encountering the closing brace right after the nested block. Now, if we
- /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
- /// \c false), the entire block has to be kept on the same line (which is only
- /// possible if it fits on the line, only contains a single statement, etc.
- ///
- /// If \p NewLine is true, we format the nested block on separate lines, i.e.
- /// break after the "{", format all lines with correct indentation and the put
- /// the closing "}" on yet another new line.
- ///
- /// This enables us to keep the simple structure of the
- /// \c UnwrappedLineFormatter, where we only have two options for each token:
- /// break or don't break.
- bool formatChildren(LineState &State, bool NewLine, bool DryRun,
- unsigned &Penalty);
-
- ContinuationIndenter *Indenter;
- WhitespaceManager *Whitespaces;
- FormatStyle Style;
-
- llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
+ /// \brief Returns the column limit for a line, taking into account whether we
+ /// need an escaped newline due to a continued preprocessor directive.
+ unsigned getColumnLimit(bool InPPDirective, const AnnotatedLine *NextLine) const;
// Cache to store the penalty of formatting a vector of AnnotatedLines
// starting from a specific additional offset. Improves performance if there
// are many nested blocks.
std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>,
unsigned> PenaltyCache;
+
+ ContinuationIndenter *Indenter;
+ WhitespaceManager *Whitespaces;
+ const FormatStyle &Style;
+ const AdditionalKeywords &Keywords;
+ bool *IncompleteFormat;
};
} // end namespace format
} // end namespace clang
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index ec04af5231be7..939528fbffe55 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -14,7 +14,9 @@
//===----------------------------------------------------------------------===//
#include "UnwrappedLineParser.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "format-parser"
@@ -56,22 +58,20 @@ private:
class ScopedMacroState : public FormatTokenSource {
public:
ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
- FormatToken *&ResetToken, bool &StructuralError)
+ FormatToken *&ResetToken)
: Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
- StructuralError(StructuralError),
- PreviousStructuralError(StructuralError), Token(nullptr) {
+ Token(nullptr) {
TokenSource = this;
Line.Level = 0;
Line.InPPDirective = true;
}
- ~ScopedMacroState() {
+ ~ScopedMacroState() override {
TokenSource = PreviousTokenSource;
ResetToken = Token;
Line.InPPDirective = false;
Line.Level = PreviousLineLevel;
- StructuralError = PreviousStructuralError;
}
FormatToken *getNextToken() override {
@@ -110,8 +110,6 @@ private:
FormatToken *&ResetToken;
unsigned PreviousLineLevel;
FormatTokenSource *PreviousTokenSource;
- bool &StructuralError;
- bool PreviousStructuralError;
FormatToken *Token;
};
@@ -206,9 +204,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback)
: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
- CurrentLines(&Lines), StructuralError(false), Style(Style),
- Keywords(Keywords), Tokens(nullptr), Callback(Callback),
- AllTokens(Tokens), PPBranchLevel(-1) {}
+ CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
+ Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
void UnwrappedLineParser::reset() {
PPBranchLevel = -1;
@@ -219,11 +216,10 @@ void UnwrappedLineParser::reset() {
PreprocessorDirectives.clear();
CurrentLines = &Lines;
DeclarationScopeStack.clear();
- StructuralError = false;
PPStack.clear();
}
-bool UnwrappedLineParser::parse() {
+void UnwrappedLineParser::parse() {
IndexedTokenSource TokenSource(AllTokens);
do {
DEBUG(llvm::dbgs() << "----\n");
@@ -256,13 +252,15 @@ bool UnwrappedLineParser::parse() {
}
} while (!PPLevelBranchIndex.empty());
- return StructuralError;
}
void UnwrappedLineParser::parseFile() {
- ScopedDeclarationState DeclarationState(
- *Line, DeclarationScopeStack,
- /*MustBeDeclaration=*/ !Line->InPPDirective);
+ // The top-level context in a file always has declarations, except for pre-
+ // processor directives and JavaScript files.
+ bool MustBeDeclaration =
+ !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
+ ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+ MustBeDeclaration);
parseLevel(/*HasOpeningBrace=*/false);
// Make sure to format the remaining tokens.
flushComments(true);
@@ -286,7 +284,6 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
case tok::r_brace:
if (HasOpeningBrace)
return;
- StructuralError = true;
nextToken();
addUnwrappedLine();
break;
@@ -305,7 +302,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
} while (!eof());
}
-void UnwrappedLineParser::calculateBraceTypes() {
+void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
// We'll parse forward through the tokens until we hit
// a closing brace or eof - note that getNextToken() will
// parse macros, so this will magically work inside macro
@@ -328,6 +325,7 @@ void UnwrappedLineParser::calculateBraceTypes() {
switch (Tok->Tok.getKind()) {
case tok::l_brace:
+ Tok->BlockKind = BK_Unknown;
LBraceStack.push_back(Tok);
break;
case tok::r_brace:
@@ -351,9 +349,11 @@ void UnwrappedLineParser::calculateBraceTypes() {
//
// We exclude + and - as they can be ObjC visibility modifiers.
ProbablyBracedList =
- NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon,
+ NextTok->isOneOf(tok::comma, tok::period, tok::colon,
tok::r_paren, tok::r_square, tok::l_brace,
tok::l_paren, tok::ellipsis) ||
+ (NextTok->is(tok::semi) &&
+ (!ExpectClassBody || LBraceStack.size() != 1)) ||
(NextTok->isBinaryOperator() && !NextIsObjCMethod);
}
if (ProbablyBracedList) {
@@ -374,6 +374,7 @@ void UnwrappedLineParser::calculateBraceTypes() {
case tok::kw_for:
case tok::kw_switch:
case tok::kw_try:
+ case tok::kw___try:
if (!LBraceStack.empty())
LBraceStack.back()->BlockKind = BK_Block;
break;
@@ -407,7 +408,6 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
if (!FormatTok->Tok.is(tok::r_brace)) {
Line->Level = InitialLevel;
- StructuralError = true;
return;
}
@@ -417,7 +417,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
Line->Level = InitialLevel;
}
-static bool IsGoogScope(const UnwrappedLine &Line) {
+static bool isGoogScope(const UnwrappedLine &Line) {
// FIXME: Closure-library specific stuff should not be hard-coded but be
// configurable.
if (Line.Tokens.size() < 4)
@@ -453,12 +453,13 @@ void UnwrappedLineParser::parseChildBlock() {
nextToken();
{
bool GoogScope =
- Style.Language == FormatStyle::LK_JavaScript && IsGoogScope(*Line);
+ Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
ScopedLineState LineState(*this);
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
/*MustBeDeclaration=*/false);
Line->Level += GoogScope ? 0 : 1;
parseLevel(/*HasOpeningBrace=*/true);
+ flushComments(isOnNewLine(*FormatTok));
Line->Level -= GoogScope ? 0 : 1;
}
nextToken();
@@ -466,7 +467,7 @@ void UnwrappedLineParser::parseChildBlock() {
void UnwrappedLineParser::parsePPDirective() {
assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
- ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
+ ScopedMacroState MacroState(*Line, Tokens, FormatTok);
nextToken();
if (!FormatTok->Tok.getIdentifierInfo()) {
@@ -549,6 +550,7 @@ void UnwrappedLineParser::conditionalCompilationEnd() {
void UnwrappedLineParser::parsePPIf(bool IfDef) {
nextToken();
bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
+ FormatTok->Tok.getLiteralData() != nullptr &&
StringRef(FormatTok->Tok.getLiteralData(),
FormatTok->Tok.getLength()) == "0") ||
FormatTok->Tok.is(tok::kw_false);
@@ -602,7 +604,7 @@ void UnwrappedLineParser::parsePPUnknown() {
// Here we blacklist certain tokens that are not usually the first token in an
// unwrapped line. This is used in attempt to distinguish macro calls without
// trailing semicolons from other constructs split to several lines.
-bool tokenCanStartNewLine(clang::Token Tok) {
+static bool tokenCanStartNewLine(const clang::Token &Tok) {
// Semicolon can be a null-statement, l_square can be a start of a macro or
// a C++11 attribute, but this doesn't seem to be common.
return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
@@ -655,6 +657,11 @@ void UnwrappedLineParser::parseStructuralElement() {
nextToken();
addUnwrappedLine();
return;
+ case tok::objc_try:
+ // This branch isn't strictly necessary (the kw_try case below would
+ // do this too after the tok::at is parsed above). But be explicit.
+ parseTryCatch();
+ return;
default:
break;
}
@@ -662,10 +669,13 @@ void UnwrappedLineParser::parseStructuralElement() {
case tok::kw_asm:
nextToken();
if (FormatTok->is(tok::l_brace)) {
+ FormatTok->Type = TT_InlineASMBrace;
nextToken();
while (FormatTok && FormatTok->isNot(tok::eof)) {
if (FormatTok->is(tok::r_brace)) {
+ FormatTok->Type = TT_InlineASMBrace;
nextToken();
+ addUnwrappedLine();
break;
}
FormatTok->Finalized = true;
@@ -686,7 +696,8 @@ void UnwrappedLineParser::parseStructuralElement() {
case tok::kw_public:
case tok::kw_protected:
case tok::kw_private:
- if (Style.Language == FormatStyle::LK_Java)
+ if (Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript)
nextToken();
else
parseAccessSpecifier();
@@ -712,6 +723,7 @@ void UnwrappedLineParser::parseStructuralElement() {
parseCaseLabel();
return;
case tok::kw_try:
+ case tok::kw___try:
parseTryCatch();
return;
case tok::kw_extern:
@@ -725,11 +737,30 @@ void UnwrappedLineParser::parseStructuralElement() {
}
}
break;
+ case tok::kw_export:
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ parseJavaScriptEs6ImportExport();
+ return;
+ }
+ break;
case tok::identifier:
- if (FormatTok->IsForEachMacro) {
+ if (FormatTok->is(TT_ForEachMacro)) {
parseForOrWhileLoop();
return;
}
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ FormatTok->is(Keywords.kw_import)) {
+ parseJavaScriptEs6ImportExport();
+ return;
+ }
+ if (FormatTok->is(Keywords.kw_signals)) {
+ nextToken();
+ if (FormatTok->is(tok::colon)) {
+ nextToken();
+ addUnwrappedLine();
+ }
+ return;
+ }
// In all other cases, parse the declaration.
break;
default:
@@ -806,26 +837,42 @@ void UnwrappedLineParser::parseStructuralElement() {
parseTryCatch();
return;
case tok::identifier: {
- StringRef Text = FormatTok->TokenText;
// Parse function literal unless 'function' is the first token in a line
// in which case this should be treated as a free-standing function.
- if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" &&
- Line->Tokens.size() > 0) {
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
tryToParseJSFunction();
break;
}
+ if ((Style.Language == FormatStyle::LK_JavaScript ||
+ Style.Language == FormatStyle::LK_Java) &&
+ FormatTok->is(Keywords.kw_interface)) {
+ parseRecord();
+ break;
+ }
+
+ StringRef Text = FormatTok->TokenText;
nextToken();
- if (Line->Tokens.size() == 1) {
- if (FormatTok->Tok.is(tok::colon)) {
+ if (Line->Tokens.size() == 1 &&
+ // JS doesn't have macros, and within classes colons indicate fields,
+ // not labels.
+ Style.Language != FormatStyle::LK_JavaScript) {
+ if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
parseLabel();
return;
}
// Recognize function-like macro usages without trailing semicolon as
- // well as free-standing macrose like Q_OBJECT.
+ // well as free-standing macros like Q_OBJECT.
bool FunctionLike = FormatTok->is(tok::l_paren);
if (FunctionLike)
parseParens();
- if (FormatTok->NewlinesBefore > 0 &&
+
+ bool FollowedByNewline =
+ CommentsBeforeNextToken.empty()
+ ? FormatTok->NewlinesBefore > 0
+ : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
+
+ if (FollowedByNewline &&
(Text.size() >= 5 || FunctionLike) &&
tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
addUnwrappedLine();
@@ -835,6 +882,17 @@ void UnwrappedLineParser::parseStructuralElement() {
break;
}
case tok::equal:
+ // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
+ // TT_JsFatArrow. The always start an expression or a child block if
+ // followed by a curly.
+ if (FormatTok->is(TT_JsFatArrow)) {
+ nextToken();
+ if (FormatTok->is(tok::l_brace)) {
+ parseChildBlock();
+ }
+ break;
+ }
+
nextToken();
if (FormatTok->Tok.is(tok::l_brace)) {
parseBracedList();
@@ -843,6 +901,9 @@ void UnwrappedLineParser::parseStructuralElement() {
case tok::l_square:
parseSquare();
break;
+ case tok::kw_new:
+ parseNew();
+ break;
default:
nextToken();
break;
@@ -952,22 +1013,48 @@ void UnwrappedLineParser::tryToParseJSFunction() {
// Consume function name.
if (FormatTok->is(tok::identifier))
- nextToken();
+ nextToken();
if (FormatTok->isNot(tok::l_paren))
return;
- nextToken();
- while (FormatTok->isNot(tok::l_brace)) {
- // Err on the side of caution in order to avoid consuming the full file in
- // case of incomplete code.
- if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren,
- tok::comment))
- return;
+
+ // Parse formal parameter list.
+ parseBalanced(tok::l_paren, tok::r_paren);
+
+ if (FormatTok->is(tok::colon)) {
+ // Parse a type definition.
nextToken();
+
+ // Eat the type declaration. For braced inline object types, balance braces,
+ // otherwise just parse until finding an l_brace for the function body.
+ if (FormatTok->is(tok::l_brace)) {
+ parseBalanced(tok::l_brace, tok::r_brace);
+ } else {
+ while(FormatTok->isNot(tok::l_brace) && !eof()) {
+ nextToken();
+ }
+ }
}
+
parseChildBlock();
}
+void UnwrappedLineParser::parseBalanced(tok::TokenKind OpenKind,
+ tok::TokenKind CloseKind) {
+ assert(FormatTok->is(OpenKind));
+ nextToken();
+ int Depth = 1;
+ while (Depth > 0 && !eof()) {
+ // Parse the formal parameter list.
+ if (FormatTok->is(OpenKind)) {
+ ++Depth;
+ } else if (FormatTok->is(CloseKind)) {
+ --Depth;
+ }
+ nextToken();
+ }
+}
+
bool UnwrappedLineParser::tryToParseBracedList() {
if (FormatTok->BlockKind == BK_Unknown)
calculateBraceTypes();
@@ -985,10 +1072,19 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
// FIXME: Once we have an expression parser in the UnwrappedLineParser,
// replace this by using parseAssigmentExpression() inside.
do {
- if (Style.Language == FormatStyle::LK_JavaScript &&
- FormatTok->is(Keywords.kw_function)) {
- tryToParseJSFunction();
- continue;
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (FormatTok->is(Keywords.kw_function)) {
+ tryToParseJSFunction();
+ continue;
+ } else if (FormatTok->is(TT_JsFatArrow)) {
+ nextToken();
+ // Fat arrows can be followed by simple expressions or by child blocks
+ // in curly braces.
+ if (FormatTok->is(tok::l_brace)){
+ parseChildBlock();
+ continue;
+ }
+ }
}
switch (FormatTok->Tok.getKind()) {
case tok::caret:
@@ -1006,6 +1102,17 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
FormatTok->BlockKind = BK_BracedInit;
parseBracedList();
break;
+ case tok::r_paren:
+ // JavaScript can just have free standing methods and getters/setters in
+ // object literals. Detect them by a "{" following ")".
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ nextToken();
+ if (FormatTok->is(tok::l_brace))
+ parseChildBlock();
+ break;
+ }
+ nextToken();
+ break;
case tok::r_brace:
nextToken();
return !HasError;
@@ -1046,9 +1153,8 @@ void UnwrappedLineParser::parseParens() {
tryToParseLambda();
break;
case tok::l_brace:
- if (!tryToParseBracedList()) {
+ if (!tryToParseBracedList())
parseChildBlock();
- }
break;
case tok::at:
nextToken();
@@ -1088,9 +1194,8 @@ void UnwrappedLineParser::parseSquare() {
parseSquare();
break;
case tok::l_brace: {
- if (!tryToParseBracedList()) {
+ if (!tryToParseBracedList())
parseChildBlock();
- }
break;
}
case tok::at:
@@ -1148,7 +1253,7 @@ void UnwrappedLineParser::parseIfThenElse() {
}
void UnwrappedLineParser::parseTryCatch() {
- assert(FormatTok->is(tok::kw_try) && "'try' expected");
+ assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
nextToken();
bool NeedsUnwrappedLine = false;
if (FormatTok->is(tok::colon)) {
@@ -1158,8 +1263,6 @@ void UnwrappedLineParser::parseTryCatch() {
nextToken();
if (FormatTok->is(tok::l_paren))
parseParens();
- else
- StructuralError = true;
if (FormatTok->is(tok::comma))
nextToken();
}
@@ -1182,23 +1285,29 @@ void UnwrappedLineParser::parseTryCatch() {
// The C++ standard requires a compound-statement after a try.
// If there's none, we try to assume there's a structuralElement
// and try to continue.
- StructuralError = true;
addUnwrappedLine();
++Line->Level;
parseStructuralElement();
--Line->Level;
}
- while (FormatTok->is(tok::kw_catch) ||
- ((Style.Language == FormatStyle::LK_Java ||
- Style.Language == FormatStyle::LK_JavaScript) &&
- FormatTok->is(Keywords.kw_finally))) {
+ while (1) {
+ if (FormatTok->is(tok::at))
+ nextToken();
+ if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
+ tok::kw___finally) ||
+ ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ FormatTok->is(Keywords.kw_finally)) ||
+ (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
+ FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
+ break;
nextToken();
while (FormatTok->isNot(tok::l_brace)) {
if (FormatTok->is(tok::l_paren)) {
parseParens();
continue;
}
- if (FormatTok->isOneOf(tok::semi, tok::r_brace))
+ if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
return;
nextToken();
}
@@ -1242,9 +1351,33 @@ void UnwrappedLineParser::parseNamespace() {
// FIXME: Add error handling.
}
+void UnwrappedLineParser::parseNew() {
+ assert(FormatTok->is(tok::kw_new) && "'new' expected");
+ nextToken();
+ if (Style.Language != FormatStyle::LK_Java)
+ return;
+
+ // In Java, we can parse everything up to the parens, which aren't optional.
+ do {
+ // There should not be a ;, { or } before the new's open paren.
+ if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
+ return;
+
+ // Consume the parens.
+ if (FormatTok->is(tok::l_paren)) {
+ parseParens();
+
+ // If there is a class body of an anonymous class, consume that as child.
+ if (FormatTok->is(tok::l_brace))
+ parseChildBlock();
+ return;
+ }
+ nextToken();
+ } while (!eof());
+}
+
void UnwrappedLineParser::parseForOrWhileLoop() {
- assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) ||
- FormatTok->IsForEachMacro) &&
+ assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
"'for', 'while' or foreach macro expected");
nextToken();
if (FormatTok->Tok.is(tok::l_paren))
@@ -1304,6 +1437,8 @@ void UnwrappedLineParser::parseLabel() {
}
addUnwrappedLine();
} else {
+ if (FormatTok->is(tok::semi))
+ nextToken();
addUnwrappedLine();
}
Line->Level = OldLineLevel;
@@ -1338,8 +1473,7 @@ void UnwrappedLineParser::parseSwitch() {
void UnwrappedLineParser::parseAccessSpecifier() {
nextToken();
// Understand Qt's slots.
- if (FormatTok->is(tok::identifier) &&
- (FormatTok->TokenText == "slots" || FormatTok->TokenText == "Q_SLOTS"))
+ if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
nextToken();
// Otherwise, we don't know what it is, and we'd better keep the next token.
if (FormatTok->Tok.is(tok::colon))
@@ -1455,37 +1589,45 @@ void UnwrappedLineParser::parseJavaEnumBody() {
void UnwrappedLineParser::parseRecord() {
const FormatToken &InitialToken = *FormatTok;
nextToken();
- if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute,
- tok::kw___declspec, tok::kw_alignas)) {
+
+
+ // The actual identifier can be a nested name specifier, and in macros
+ // it is often token-pasted.
+ while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
+ tok::kw___attribute, tok::kw___declspec,
+ tok::kw_alignas) ||
+ ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ FormatTok->isOneOf(tok::period, tok::comma))) {
+ bool IsNonMacroIdentifier =
+ FormatTok->is(tok::identifier) &&
+ FormatTok->TokenText != FormatTok->TokenText.upper();
nextToken();
// We can have macros or attributes in between 'class' and the class name.
- if (FormatTok->Tok.is(tok::l_paren)) {
+ if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
parseParens();
- }
- // The actual identifier can be a nested name specifier, and in macros
- // it is often token-pasted.
- while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) ||
- FormatTok->is(tok::hashhash) ||
- (Style.Language == FormatStyle::LK_Java &&
- FormatTok->isOneOf(tok::period, tok::comma)))
- nextToken();
+ }
- // Note that parsing away template declarations here leads to incorrectly
- // accepting function declarations as record declarations.
- // In general, we cannot solve this problem. Consider:
- // class A<int> B() {}
- // which can be a function definition or a class definition when B() is a
- // macro. If we find enough real-world cases where this is a problem, we
- // can parse for the 'template' keyword in the beginning of the statement,
- // and thus rule out the record production in case there is no template
- // (this would still leave us with an ambiguity between template function
- // and class declarations).
- if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
- while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
- if (FormatTok->Tok.is(tok::semi))
- return;
- nextToken();
+ // Note that parsing away template declarations here leads to incorrectly
+ // accepting function declarations as record declarations.
+ // In general, we cannot solve this problem. Consider:
+ // class A<int> B() {}
+ // which can be a function definition or a class definition when B() is a
+ // macro. If we find enough real-world cases where this is a problem, we
+ // can parse for the 'template' keyword in the beginning of the statement,
+ // and thus rule out the record production in case there is no template
+ // (this would still leave us with an ambiguity between template function
+ // and class declarations).
+ if (FormatTok->isOneOf(tok::colon, tok::less)) {
+ while (!eof()) {
+ if (FormatTok->is(tok::l_brace)) {
+ calculateBraceTypes(/*ExpectClassBody=*/true);
+ if (!tryToParseBracedList())
+ break;
}
+ if (FormatTok->Tok.is(tok::semi))
+ return;
+ nextToken();
}
}
if (FormatTok->Tok.is(tok::l_brace)) {
@@ -1498,8 +1640,9 @@ void UnwrappedLineParser::parseRecord() {
// We fall through to parsing a structural element afterwards, so
// class A {} n, m;
// will end up in one unwrapped line.
- // This does not apply for Java.
- if (Style.Language == FormatStyle::LK_Java)
+ // This does not apply for Java and JavaScript.
+ if (Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript)
addUnwrappedLine();
}
@@ -1578,6 +1721,35 @@ void UnwrappedLineParser::parseObjCProtocol() {
parseObjCUntilAtEnd();
}
+void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
+ assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
+ nextToken();
+
+ // Consume the "default" in "export default class/function".
+ if (FormatTok->is(tok::kw_default))
+ nextToken();
+
+ // Consume "function" and "default function", so that these get parsed as
+ // free-standing JS functions, i.e. do not require a trailing semicolon.
+ if (FormatTok->is(Keywords.kw_function)) {
+ nextToken();
+ return;
+ }
+
+ if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_var))
+ return; // Fall through to parsing the corresponding structure.
+
+ if (FormatTok->is(tok::l_brace)) {
+ FormatTok->BlockKind = BK_Block;
+ parseBracedList();
+ }
+
+ while (!eof() && FormatTok->isNot(tok::semi) &&
+ FormatTok->isNot(tok::l_brace)) {
+ nextToken();
+ }
+}
+
LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
StringRef Prefix = "") {
llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
@@ -1634,14 +1806,12 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
I = CommentsBeforeNextToken.begin(),
E = CommentsBeforeNextToken.end();
I != E; ++I) {
- if (isOnNewLine(**I) && JustComments) {
+ if (isOnNewLine(**I) && JustComments)
addUnwrappedLine();
- }
pushToken(*I);
}
- if (NewlineBeforeNext && JustComments) {
+ if (NewlineBeforeNext && JustComments)
addUnwrappedLine();
- }
CommentsBeforeNextToken.clear();
}
@@ -1662,8 +1832,7 @@ void UnwrappedLineParser::readToken() {
(FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
// If there is an unfinished unwrapped line, we flush the preprocessor
// directives only after that unwrapped line was finished later.
- bool SwitchToPreprocessorLines =
- !Line->Tokens.empty() && CurrentLines == &Lines;
+ bool SwitchToPreprocessorLines = !Line->Tokens.empty();
ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
// Comments stored before the preprocessor directive need to be output
// before the preprocessor directive, at the same level as the
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 3218afecad309..6a6e56fea0280 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -65,8 +65,7 @@ public:
ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback);
- /// Returns true in case of a structural error.
- bool parse();
+ void parse();
private:
void reset();
@@ -95,6 +94,7 @@ private:
void parseCaseLabel();
void parseSwitch();
void parseNamespace();
+ void parseNew();
void parseAccessSpecifier();
void parseEnum();
void parseJavaEnumBody();
@@ -103,16 +103,22 @@ private:
void parseObjCUntilAtEnd();
void parseObjCInterfaceOrImplementation();
void parseObjCProtocol();
+ void parseJavaScriptEs6ImportExport();
bool tryToParseLambda();
bool tryToParseLambdaIntroducer();
void tryToParseJSFunction();
+ /// \brief Parses tokens until encountering the CloseKind token, but balances
+ /// tokens when encountering more OpenKind tokens. Useful for e.g. parsing a
+ /// curly brace delimited block that can contain nested blocks.
+ /// The parser must be positioned on a token of OpenKind.
+ void parseBalanced(tok::TokenKind OpenKind, tok::TokenKind CloseKind);
void addUnwrappedLine();
bool eof() const;
void nextToken();
void readToken();
void flushComments(bool NewlineBeforeNext);
void pushToken(FormatToken *Tok);
- void calculateBraceTypes();
+ void calculateBraceTypes(bool ExpectClassBody = false);
// Marks a conditional compilation edge (for example, an '#if', '#ifdef',
// '#else' or merge conflict marker). If 'Unreachable' is true, assumes
@@ -156,10 +162,6 @@ private:
// whether we are in a compound statement or not.
std::vector<bool> DeclarationScopeStack;
- // Will be true if we encounter an error that leads to possibily incorrect
- // indentation levels.
- bool StructuralError;
-
const FormatStyle &Style;
const AdditionalKeywords &Keywords;
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index bf1207e59c902..4baaab1c9877f 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -36,7 +36,9 @@ WhitespaceManager::Change::Change(
PreviousLinePostfix(PreviousLinePostfix),
CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
ContinuesPPDirective(ContinuesPPDirective), IndentLevel(IndentLevel),
- Spaces(Spaces) {}
+ Spaces(Spaces), IsTrailingComment(false), TokenLength(0),
+ PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
+ StartOfBlockComment(nullptr), IndentationOffset(0) {}
void WhitespaceManager::reset() {
Changes.clear();
@@ -91,6 +93,7 @@ const tooling::Replacements &WhitespaceManager::generateReplacements() {
std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
calculateLineBreakInformation();
+ alignConsecutiveAssignments();
alignTrailingComments();
alignEscapedNewlines();
generateChanges();
@@ -139,6 +142,96 @@ void WhitespaceManager::calculateLineBreakInformation() {
}
}
+// Walk through all of the changes and find sequences of "=" to align. To do
+// so, keep track of the lines and whether or not an "=" was found on align. If
+// a "=" is found on a line, extend the current sequence. If the current line
+// cannot be part of a sequence, e.g. because there is an empty line before it
+// or it contains non-assignments, finalize the previous sequence.
+void WhitespaceManager::alignConsecutiveAssignments() {
+ if (!Style.AlignConsecutiveAssignments)
+ return;
+
+ unsigned MinColumn = 0;
+ unsigned StartOfSequence = 0;
+ unsigned EndOfSequence = 0;
+ bool FoundAssignmentOnLine = false;
+ bool FoundLeftParenOnLine = false;
+ unsigned CurrentLine = 0;
+
+ auto AlignSequence = [&] {
+ alignConsecutiveAssignments(StartOfSequence, EndOfSequence, MinColumn);
+ MinColumn = 0;
+ StartOfSequence = 0;
+ EndOfSequence = 0;
+ };
+
+ for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
+ if (Changes[i].NewlinesBefore != 0) {
+ CurrentLine += Changes[i].NewlinesBefore;
+ if (StartOfSequence > 0 &&
+ (Changes[i].NewlinesBefore > 1 || !FoundAssignmentOnLine)) {
+ EndOfSequence = i;
+ AlignSequence();
+ }
+ FoundAssignmentOnLine = false;
+ FoundLeftParenOnLine = false;
+ }
+
+ if ((Changes[i].Kind == tok::equal &&
+ (FoundAssignmentOnLine || ((Changes[i].NewlinesBefore > 0 ||
+ Changes[i + 1].NewlinesBefore > 0)))) ||
+ (!FoundLeftParenOnLine && Changes[i].Kind == tok::r_paren)) {
+ if (StartOfSequence > 0)
+ AlignSequence();
+ } else if (Changes[i].Kind == tok::l_paren) {
+ FoundLeftParenOnLine = true;
+ if (!FoundAssignmentOnLine && StartOfSequence > 0)
+ AlignSequence();
+ } else if (!FoundAssignmentOnLine && !FoundLeftParenOnLine &&
+ Changes[i].Kind == tok::equal) {
+ FoundAssignmentOnLine = true;
+ EndOfSequence = i;
+ if (StartOfSequence == 0)
+ StartOfSequence = i;
+
+ unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
+ MinColumn = std::max(MinColumn, ChangeMinColumn);
+ }
+ }
+
+ if (StartOfSequence > 0) {
+ EndOfSequence = Changes.size();
+ AlignSequence();
+ }
+}
+
+void WhitespaceManager::alignConsecutiveAssignments(unsigned Start,
+ unsigned End,
+ unsigned Column) {
+ bool AlignedAssignment = false;
+ int PreviousShift = 0;
+ for (unsigned i = Start; i != End; ++i) {
+ int Shift = 0;
+ if (Changes[i].NewlinesBefore > 0)
+ AlignedAssignment = false;
+ if (!AlignedAssignment && Changes[i].Kind == tok::equal) {
+ Shift = Column - Changes[i].StartOfTokenColumn;
+ AlignedAssignment = true;
+ PreviousShift = Shift;
+ }
+ assert(Shift >= 0);
+ Changes[i].Spaces += Shift;
+ if (i + 1 != Changes.size())
+ Changes[i + 1].PreviousEndOfTokenColumn += Shift;
+ Changes[i].StartOfTokenColumn += Shift;
+ if (AlignedAssignment) {
+ Changes[i].StartOfTokenColumn += PreviousShift;
+ if (i + 1 != Changes.size())
+ Changes[i + 1].PreviousEndOfTokenColumn += PreviousShift;
+ }
+ }
+}
+
void WhitespaceManager::alignTrailingComments() {
unsigned MinColumn = 0;
unsigned MaxColumn = UINT_MAX;
@@ -264,6 +357,11 @@ void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
void WhitespaceManager::generateChanges() {
for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
const Change &C = Changes[i];
+ if (i > 0) {
+ assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
+ C.OriginalWhitespaceRange.getBegin() &&
+ "Generating two replacements for the same location");
+ }
if (C.CreateReplacement) {
std::string ReplacementText = C.PreviousLinePostfix;
if (C.ContinuesPPDirective)
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h
index 28730d457eba8..4bfc813b2c349 100644
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h
@@ -164,6 +164,13 @@ private:
/// \c EscapedNewlineColumn for the first tokens or token parts in a line.
void calculateLineBreakInformation();
+ /// \brief Align consecutive assignments over all \c Changes.
+ void alignConsecutiveAssignments();
+
+ /// \brief Align consecutive assignments from change \p Start to change \p End at
+ /// the specified \p Column.
+ void alignConsecutiveAssignments(unsigned Start, unsigned End, unsigned Column);
+
/// \brief Align trailing comments over all \c Changes.
void alignTrailingComments();