diff options
Diffstat (limited to 'lib/Format/UnwrappedLineFormatter.cpp')
| -rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 941 | 
1 files changed, 592 insertions, 349 deletions
| diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index ca66e7351641..cbf8c6c92211 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -25,19 +25,152 @@ bool startsExternCBlock(const AnnotatedLine &Line) {           NextNext && NextNext->is(tok::l_brace);  } +/// \brief Tracks the indent level of \c AnnotatedLines across levels. +/// +/// \c nextLine must be called for each \c AnnotatedLine, after which \c +/// getIndent() will return the indent for the last line \c nextLine was called +/// with. +/// If the line is not formatted (and thus the indent does not change), calling +/// \c adjustToUnmodifiedLine after the call to \c nextLine will cause +/// subsequent lines on the same level to be indented at the same level as the +/// given line. +class LevelIndentTracker { +public: +  LevelIndentTracker(const FormatStyle &Style, +                     const AdditionalKeywords &Keywords, unsigned StartLevel, +                     int AdditionalIndent) +      : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) { +    for (unsigned i = 0; i != StartLevel; ++i) +      IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); +  } + +  /// \brief Returns the indent for the current line. +  unsigned getIndent() const { return Indent; } + +  /// \brief Update the indent state given that \p Line is going to be formatted +  /// next. +  void nextLine(const AnnotatedLine &Line) { +    Offset = getIndentOffset(*Line.First); +    if (Line.InPPDirective) { +      Indent = Line.Level * Style.IndentWidth + AdditionalIndent; +    } else { +      while (IndentForLevel.size() <= Line.Level) +        IndentForLevel.push_back(-1); +      IndentForLevel.resize(Line.Level + 1); +      Indent = getIndent(IndentForLevel, Line.Level); +    } +    if (static_cast<int>(Indent) + Offset >= 0) +      Indent += Offset; +  } + +  /// \brief Update the level indent to adapt to the given \p Line. +  /// +  /// When a line is not formatted, we move the subsequent lines on the same +  /// level to the same indent. +  /// Note that \c nextLine must have been called before this method. +  void adjustToUnmodifiedLine(const AnnotatedLine &Line) { +    unsigned LevelIndent = Line.First->OriginalColumn; +    if (static_cast<int>(LevelIndent) - Offset >= 0) +      LevelIndent -= Offset; +    if ((Line.First->isNot(tok::comment) || IndentForLevel[Line.Level] == -1) && +        !Line.InPPDirective) +      IndentForLevel[Line.Level] = LevelIndent; +  } + +private: +  /// \brief Get the offset of the line relatively to the level. +  /// +  /// For example, 'public:' labels in classes are offset by 1 or 2 +  /// characters to the left from their level. +  int getIndentOffset(const FormatToken &RootToken) { +    if (Style.Language == FormatStyle::LK_Java || +        Style.Language == FormatStyle::LK_JavaScript) +      return 0; +    if (RootToken.isAccessSpecifier(false) || +        RootToken.isObjCAccessSpecifier() || +        (RootToken.is(Keywords.kw_signals) && RootToken.Next && +         RootToken.Next->is(tok::colon))) +      return Style.AccessModifierOffset; +    return 0; +  } + +  /// \brief Get the indent of \p Level from \p IndentForLevel. +  /// +  /// \p IndentForLevel must contain the indent for the level \c l +  /// at \p IndentForLevel[l], or a value < 0 if the indent for +  /// that level is unknown. +  unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) { +    if (IndentForLevel[Level] != -1) +      return IndentForLevel[Level]; +    if (Level == 0) +      return 0; +    return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; +  } + +  const FormatStyle &Style; +  const AdditionalKeywords &Keywords; +  const unsigned AdditionalIndent; + +  /// \brief The indent in characters for each level. +  std::vector<int> IndentForLevel; + +  /// \brief Offset of the current line relative to the indent level. +  /// +  /// For example, the 'public' keywords is often indented with a negative +  /// offset. +  int Offset = 0; + +  /// \brief The current line's indent. +  unsigned Indent = 0; +}; +  class LineJoiner {  public: -  LineJoiner(const FormatStyle &Style) : Style(Style) {} +  LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords, +             const SmallVectorImpl<AnnotatedLine *> &Lines) +      : Style(Style), Keywords(Keywords), End(Lines.end()), +        Next(Lines.begin()) {} + +  /// \brief Returns the next line, merging multiple lines into one if possible. +  const AnnotatedLine *getNextMergedLine(bool DryRun, +                                         LevelIndentTracker &IndentTracker) { +    if (Next == End) +      return nullptr; +    const AnnotatedLine *Current = *Next; +    IndentTracker.nextLine(*Current); +    unsigned MergedLines = +        tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End); +    if (MergedLines > 0 && Style.ColumnLimit == 0) +      // Disallow line merging if there is a break at the start of one of the +      // input lines. +      for (unsigned i = 0; i < MergedLines; ++i) +        if (Next[i + 1]->First->NewlinesBefore > 0) +          MergedLines = 0; +    if (!DryRun) +      for (unsigned i = 0; i < MergedLines; ++i) +        join(*Next[i], *Next[i + 1]); +    Next = Next + MergedLines + 1; +    return Current; +  } +private:    /// \brief Calculates how many lines can be merged into 1 starting at \p I.    unsigned    tryFitMultipleLinesInOne(unsigned Indent,                             SmallVectorImpl<AnnotatedLine *>::const_iterator I,                             SmallVectorImpl<AnnotatedLine *>::const_iterator E) { +    // Can't join the last line with anything. +    if (I + 1 == E) +      return 0;      // We can never merge stuff if there are trailing line comments.      const AnnotatedLine *TheLine = *I;      if (TheLine->Last->is(TT_LineComment))        return 0; +    if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) +      return 0; +    if (TheLine->InPPDirective && +        (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)) +      return 0;      if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)        return 0; @@ -50,9 +183,6 @@ public:                  ? 0                  : Limit - TheLine->Last->TotalLength; -    if (I + 1 == E || I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) -      return 0; -      // FIXME: TheLine->Level != 0 might or might not be the right check to do.      // If necessary, change to something smarter.      bool MergeShortFunctions = @@ -113,15 +243,12 @@ public:      return 0;    } -private:    unsigned    tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,                              SmallVectorImpl<AnnotatedLine *>::const_iterator E,                              unsigned Limit) {      if (Limit == 0)        return 0; -    if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline) -      return 0;      if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)        return 0;      if (1 + I[1]->Last->TotalLength > Limit) @@ -147,8 +274,8 @@ private:        return 0;      if (1 + I[1]->Last->TotalLength > Limit)        return 0; -    if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, -                             tok::kw_while, TT_LineComment)) +    if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while, +                             TT_LineComment))        return 0;      // Only inline simple if's (no nested if or else).      if (I + 2 != E && Line.First->is(tok::kw_if) && @@ -157,9 +284,10 @@ private:      return 1;    } -  unsigned tryMergeShortCaseLabels( -      SmallVectorImpl<AnnotatedLine *>::const_iterator I, -      SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { +  unsigned +  tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I, +                          SmallVectorImpl<AnnotatedLine *>::const_iterator E, +                          unsigned Limit) {      if (Limit == 0 || I + 1 == E ||          I[1]->First->isOneOf(tok::kw_case, tok::kw_default))        return 0; @@ -191,16 +319,21 @@ private:      AnnotatedLine &Line = **I;      // Don't merge ObjC @ keywords and methods. +    // FIXME: If an option to allow short exception handling clauses on a single +    // line is added, change this to not return for @try and friends.      if (Style.Language != FormatStyle::LK_Java &&          Line.First->isOneOf(tok::at, tok::minus, tok::plus))        return 0;      // Check that the current line allows merging. This depends on whether we      // are in a control flow statements as well as several style flags. -    if (Line.First->isOneOf(tok::kw_else, tok::kw_case)) +    if (Line.First->isOneOf(tok::kw_else, tok::kw_case) || +        (Line.First->Next && Line.First->Next->is(tok::kw_else)))        return 0;      if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, -                            tok::kw_catch, tok::kw_for, tok::r_brace)) { +                            tok::kw___try, tok::kw_catch, tok::kw___finally, +                            tok::kw_for, tok::r_brace) || +        Line.First->is(Keywords.kw___except)) {        if (!Style.AllowShortBlocksOnASingleLine)          return 0;        if (!Style.AllowShortIfStatementsOnASingleLine && @@ -211,7 +344,11 @@ private:          return 0;        // FIXME: Consider an option to allow short exception handling clauses on        // a single line. -      if (Line.First->isOneOf(tok::kw_try, tok::kw_catch)) +      // FIXME: This isn't covered by tests. +      // FIXME: For catch, __except, __finally the first token on the line +      // is '}', so this isn't correct here. +      if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch, +                              Keywords.kw___except, tok::kw___finally))          return 0;      } @@ -226,7 +363,8 @@ private:      } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace) &&                 !startsExternCBlock(Line)) {        // We don't merge short records. -      if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct)) +      if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, +                              Keywords.kw_interface))          return 0;        // Check that we still have three lines and they fit into the limit. @@ -252,6 +390,10 @@ private:        if (Tok->isNot(tok::r_brace))          return 0; +      // Don't merge "if (a) { .. } else {". +      if (Tok->Next && Tok->Next->is(tok::kw_else)) +        return 0; +        return 2;      }      return 0; @@ -285,28 +427,367 @@ private:      return false;    } +  void join(AnnotatedLine &A, const AnnotatedLine &B) { +    assert(!A.Last->Next); +    assert(!B.First->Previous); +    if (B.Affected) +      A.Affected = true; +    A.Last->Next = B.First; +    B.First->Previous = A.Last; +    B.First->CanBreakBefore = true; +    unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; +    for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { +      Tok->TotalLength += LengthA; +      A.Last = Tok; +    } +  } +    const FormatStyle &Style; +  const AdditionalKeywords &Keywords; +  const SmallVectorImpl<AnnotatedLine*>::const_iterator End; + +  SmallVectorImpl<AnnotatedLine*>::const_iterator Next;  }; -class NoColumnLimitFormatter { +static void markFinalized(FormatToken *Tok) { +  for (; Tok; Tok = Tok->Next) { +    Tok->Finalized = true; +    for (AnnotatedLine *Child : Tok->Children) +      markFinalized(Child->First); +  } +} + +#ifndef NDEBUG +static void printLineState(const LineState &State) { +  llvm::dbgs() << "State: "; +  for (const ParenState &P : State.Stack) { +    llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent +                 << " "; +  } +  llvm::dbgs() << State.NextToken->TokenText << "\n"; +} +#endif + +/// \brief Base class for classes that format one \c AnnotatedLine. +class LineFormatter {  public: -  NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {} +  LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces, +                const FormatStyle &Style, +                UnwrappedLineFormatter *BlockFormatter) +      : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), +        BlockFormatter(BlockFormatter) {} +  virtual ~LineFormatter() {} + +  /// \brief Formats an \c AnnotatedLine and returns the penalty. +  /// +  /// If \p DryRun is \c false, directly applies the changes. +  virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, +                              bool DryRun) = 0; + +protected: +  /// \brief If the \p State's next token is an r_brace closing a nested block, +  /// format the nested block before it. +  /// +  /// Returns \c true if all children could be placed successfully and adapts +  /// \p Penalty as well as \p State. If \p DryRun is false, also directly +  /// creates changes using \c Whitespaces. +  /// +  /// The crucial idea here is that children always get formatted upon +  /// encountering the closing brace right after the nested block. Now, if we +  /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is +  /// \c false), the entire block has to be kept on the same line (which is only +  /// possible if it fits on the line, only contains a single statement, etc. +  /// +  /// If \p NewLine is true, we format the nested block on separate lines, i.e. +  /// break after the "{", format all lines with correct indentation and the put +  /// the closing "}" on yet another new line. +  /// +  /// This enables us to keep the simple structure of the +  /// \c UnwrappedLineFormatter, where we only have two options for each token: +  /// break or don't break. +  bool formatChildren(LineState &State, bool NewLine, bool DryRun, +                      unsigned &Penalty) { +    const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); +    FormatToken &Previous = *State.NextToken->Previous; +    if (!LBrace || LBrace->isNot(tok::l_brace) || +        LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) +      // The previous token does not open a block. Nothing to do. We don't +      // assert so that we can simply call this function for all tokens. +      return true; + +    if (NewLine) { +      int AdditionalIndent = State.Stack.back().Indent - +                             Previous.Children[0]->Level * Style.IndentWidth; + +      Penalty += +          BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent, +                                 /*FixBadIndentation=*/true); +      return true; +    } + +    if (Previous.Children[0]->First->MustBreakBefore) +      return false; + +    // Cannot merge multiple statements into a single line. +    if (Previous.Children.size() > 1) +      return false; + +    // Cannot merge into one line if this line ends on a comment. +    if (Previous.is(tok::comment)) +      return false; + +    // We can't put the closing "}" on a line with a trailing comment. +    if (Previous.Children[0]->Last->isTrailingComment()) +      return false; + +    // If the child line exceeds the column limit, we wouldn't want to merge it. +    // We add +2 for the trailing " }". +    if (Style.ColumnLimit > 0 && +        Previous.Children[0]->Last->TotalLength + State.Column + 2 > +            Style.ColumnLimit) +      return false; + +    if (!DryRun) { +      Whitespaces->replaceWhitespace( +          *Previous.Children[0]->First, +          /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, +          /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); +    } +    Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun); + +    State.Column += 1 + Previous.Children[0]->Last->TotalLength; +    return true; +  } + +  ContinuationIndenter *Indenter; + +private: +  WhitespaceManager *Whitespaces; +  const FormatStyle &Style; +  UnwrappedLineFormatter *BlockFormatter; +}; -  /// \brief Formats the line starting at \p State, simply keeping all of the -  /// input's line breaking decisions. -  void format(unsigned FirstIndent, const AnnotatedLine *Line) { +/// \brief Formatter that keeps the existing line breaks. +class NoColumnLimitLineFormatter : public LineFormatter { +public: +  NoColumnLimitLineFormatter(ContinuationIndenter *Indenter, +                             WhitespaceManager *Whitespaces, +                             const FormatStyle &Style, +                             UnwrappedLineFormatter *BlockFormatter) +      : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + +  /// \brief Formats the line, simply keeping all of the input's line breaking +  /// decisions. +  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, +                      bool DryRun) override { +    assert(!DryRun);      LineState State = -        Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false); +        Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);      while (State.NextToken) {        bool Newline =            Indenter->mustBreak(State) ||            (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); +      unsigned Penalty = 0; +      formatChildren(State, Newline, /*DryRun=*/false, Penalty);        Indenter->addTokenToState(State, Newline, /*DryRun=*/false);      } +    return 0; +  } +}; + +/// \brief Formatter that puts all tokens into a single line without breaks. +class NoLineBreakFormatter : public LineFormatter { +public: +  NoLineBreakFormatter(ContinuationIndenter *Indenter, +                       WhitespaceManager *Whitespaces, const FormatStyle &Style, +                       UnwrappedLineFormatter *BlockFormatter) +      : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + +  /// \brief Puts all tokens into a single line. +  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, +                      bool DryRun) { +    unsigned Penalty = 0; +    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); +    while (State.NextToken) { +      formatChildren(State, /*Newline=*/false, DryRun, Penalty); +      Indenter->addTokenToState(State, /*Newline=*/false, DryRun); +    } +    return Penalty; +  } +}; + +/// \brief Finds the best way to break lines. +class OptimizingLineFormatter : public LineFormatter { +public: +  OptimizingLineFormatter(ContinuationIndenter *Indenter, +                          WhitespaceManager *Whitespaces, +                          const FormatStyle &Style, +                          UnwrappedLineFormatter *BlockFormatter) +      : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} + +  /// \brief Formats the line by finding the best line breaks with line lengths +  /// below the column limit. +  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, +                      bool DryRun) { +    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + +    // If the ObjC method declaration does not fit on a line, we should format +    // it with one arg per line. +    if (State.Line->Type == LT_ObjCMethodDecl) +      State.Stack.back().BreakBeforeParameter = true; + +    // Find best solution in solution space. +    return analyzeSolutionSpace(State, DryRun);    }  private: -  ContinuationIndenter *Indenter; +  struct CompareLineStatePointers { +    bool operator()(LineState *obj1, LineState *obj2) const { +      return *obj1 < *obj2; +    } +  }; + +  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. +  /// +  /// In case of equal penalties, we want to prefer states that were inserted +  /// first. During state generation we make sure that we insert states first +  /// that break the line as late as possible. +  typedef std::pair<unsigned, unsigned> OrderedPenalty; + +  /// \brief An edge in the solution space from \c Previous->State to \c State, +  /// inserting a newline dependent on the \c NewLine. +  struct StateNode { +    StateNode(const LineState &State, bool NewLine, StateNode *Previous) +        : State(State), NewLine(NewLine), Previous(Previous) {} +    LineState State; +    bool NewLine; +    StateNode *Previous; +  }; + +  /// \brief An item in the prioritized BFS search queue. The \c StateNode's +  /// \c State has the given \c OrderedPenalty. +  typedef std::pair<OrderedPenalty, StateNode *> QueueItem; + +  /// \brief The BFS queue type. +  typedef std::priority_queue<QueueItem, std::vector<QueueItem>, +                              std::greater<QueueItem>> QueueType; + +  /// \brief Analyze the entire solution space starting from \p InitialState. +  /// +  /// This implements a variant of Dijkstra's algorithm on the graph that spans +  /// the solution space (\c LineStates are the nodes). The algorithm tries to +  /// find the shortest path (the one with lowest penalty) from \p InitialState +  /// to a state where all tokens are placed. Returns the penalty. +  /// +  /// If \p DryRun is \c false, directly applies the changes. +  unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) { +    std::set<LineState *, CompareLineStatePointers> Seen; + +    // Increasing count of \c StateNode items we have created. This is used to +    // create a deterministic order independent of the container. +    unsigned Count = 0; +    QueueType Queue; + +    // Insert start element into queue. +    StateNode *Node = +        new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); +    Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); +    ++Count; + +    unsigned Penalty = 0; + +    // While not empty, take first element and follow edges. +    while (!Queue.empty()) { +      Penalty = Queue.top().first.first; +      StateNode *Node = Queue.top().second; +      if (!Node->State.NextToken) { +        DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); +        break; +      } +      Queue.pop(); + +      // Cut off the analysis of certain solutions if the analysis gets too +      // complex. See description of IgnoreStackForComparison. +      if (Count > 10000) +        Node->State.IgnoreStackForComparison = true; + +      if (!Seen.insert(&Node->State).second) +        // State already examined with lower penalty. +        continue; + +      FormatDecision LastFormat = Node->State.NextToken->Decision; +      if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) +        addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); +      if (LastFormat == FD_Unformatted || LastFormat == FD_Break) +        addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); +    } + +    if (Queue.empty()) { +      // We were unable to find a solution, do nothing. +      // FIXME: Add diagnostic? +      DEBUG(llvm::dbgs() << "Could not find a solution.\n"); +      return 0; +    } + +    // Reconstruct the solution. +    if (!DryRun) +      reconstructPath(InitialState, Queue.top().second); + +    DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); +    DEBUG(llvm::dbgs() << "---\n"); + +    return Penalty; +  } + +  /// \brief Add the following state to the analysis queue \c Queue. +  /// +  /// Assume the current state is \p PreviousNode and has been reached with a +  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. +  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, +                           bool NewLine, unsigned *Count, QueueType *Queue) { +    if (NewLine && !Indenter->canBreak(PreviousNode->State)) +      return; +    if (!NewLine && Indenter->mustBreak(PreviousNode->State)) +      return; + +    StateNode *Node = new (Allocator.Allocate()) +        StateNode(PreviousNode->State, NewLine, PreviousNode); +    if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) +      return; + +    Penalty += Indenter->addTokenToState(Node->State, NewLine, true); + +    Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); +    ++(*Count); +  } + +  /// \brief Applies the best formatting by reconstructing the path in the +  /// solution space that leads to \c Best. +  void reconstructPath(LineState &State, StateNode *Best) { +    std::deque<StateNode *> Path; +    // We do not need a break before the initial token. +    while (Best->Previous) { +      Path.push_front(Best); +      Best = Best->Previous; +    } +    for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); +         I != E; ++I) { +      unsigned Penalty = 0; +      formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); +      Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); + +      DEBUG({ +        printLineState((*I)->Previous->State); +        if ((*I)->NewLine) { +          llvm::dbgs() << "Penalty for placing " +                       << (*I)->Previous->State.NextToken->Tok.getName() << ": " +                       << Penalty << "\n"; +        } +      }); +    } +  } + +  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;  };  } // namespace @@ -315,7 +796,7 @@ unsigned  UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,                                 bool DryRun, int AdditionalIndent,                                 bool FixBadIndentation) { -  LineJoiner Joiner(Style); +  LineJoiner Joiner(Style, Keywords, Lines);    // Try to look up already computed penalty in DryRun-mode.    std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( @@ -326,151 +807,93 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,    assert(!Lines.empty());    unsigned Penalty = 0; -  std::vector<int> IndentForLevel; -  for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i) -    IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); +  LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level, +                                   AdditionalIndent);    const AnnotatedLine *PreviousLine = nullptr; -  for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(), -                                                        E = Lines.end(); -       I != E; ++I) { -    const AnnotatedLine &TheLine = **I; -    const FormatToken *FirstTok = TheLine.First; -    int Offset = getIndentOffset(*FirstTok); - -    // Determine indent and try to merge multiple unwrapped lines. -    unsigned Indent; -    if (TheLine.InPPDirective) { -      Indent = TheLine.Level * Style.IndentWidth; -    } else { -      while (IndentForLevel.size() <= TheLine.Level) -        IndentForLevel.push_back(-1); -      IndentForLevel.resize(TheLine.Level + 1); -      Indent = getIndent(IndentForLevel, TheLine.Level); -    } -    unsigned LevelIndent = Indent; -    if (static_cast<int>(Indent) + Offset >= 0) -      Indent += Offset; - -    // Merge multiple lines if possible. -    unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E); -    if (MergedLines > 0 && Style.ColumnLimit == 0) { -      // Disallow line merging if there is a break at the start of one of the -      // input lines. -      for (unsigned i = 0; i < MergedLines; ++i) { -        if (I[i + 1]->First->NewlinesBefore > 0) -          MergedLines = 0; -      } -    } -    if (!DryRun) { -      for (unsigned i = 0; i < MergedLines; ++i) { -        join(*I[i], *I[i + 1]); -      } -    } -    I += MergedLines; - +  const AnnotatedLine *NextLine = nullptr; +  for (const AnnotatedLine *Line = +           Joiner.getNextMergedLine(DryRun, IndentTracker); +       Line; Line = NextLine) { +    const AnnotatedLine &TheLine = *Line; +    unsigned Indent = IndentTracker.getIndent();      bool FixIndentation = -        FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn); -    if (TheLine.First->is(tok::eof)) { -      if (PreviousLine && PreviousLine->Affected && !DryRun) { -        // Remove the file's trailing whitespace. -        unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u); -        Whitespaces->replaceWhitespace(*TheLine.First, Newlines, -                                       /*IndentLevel=*/0, /*Spaces=*/0, -                                       /*TargetColumn=*/0); -      } -    } else if (TheLine.Type != LT_Invalid && -               (TheLine.Affected || FixIndentation)) { -      if (FirstTok->WhitespaceRange.isValid()) { -        if (!DryRun) -          formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, +        FixBadIndentation && (Indent != TheLine.First->OriginalColumn); +    bool ShouldFormat = TheLine.Affected || FixIndentation; +    // We cannot format this line; if the reason is that the line had a +    // parsing error, remember that. +    if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat) +      *IncompleteFormat = true; + +    if (ShouldFormat && TheLine.Type != LT_Invalid) { +      if (!DryRun) +        formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent, +                         TheLine.InPPDirective); + +      NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); +      unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); +      bool FitsIntoOneLine = +          TheLine.Last->TotalLength + Indent <= ColumnLimit || +          TheLine.Type == LT_ImportStatement; + +      if (Style.ColumnLimit == 0) +        NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) +            .formatLine(TheLine, Indent, DryRun); +      else if (FitsIntoOneLine) +        Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) +                       .formatLine(TheLine, Indent, DryRun); +      else +        Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) +                       .formatLine(TheLine, Indent, DryRun); +    } else { +      // If no token in the current line is affected, we still need to format +      // affected children. +      if (TheLine.ChildrenAffected) +        format(TheLine.Children, DryRun); + +      // Adapt following lines on the current indent level to the same level +      // unless the current \c AnnotatedLine is not at the beginning of a line. +      bool StartsNewLine = +          TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst; +      if (StartsNewLine) +        IndentTracker.adjustToUnmodifiedLine(TheLine); +      if (!DryRun) { +        bool ReformatLeadingWhitespace = +            StartsNewLine && ((PreviousLine && PreviousLine->Affected) || +                              TheLine.LeadingEmptyLinesAffected); +        // Format the first token. +        if (ReformatLeadingWhitespace) +          formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, +                           TheLine.First->OriginalColumn,                             TheLine.InPPDirective); -      } else { -        Indent = LevelIndent = FirstTok->OriginalColumn; -      } - -      // If everything fits on a single line, just put it there. -      unsigned ColumnLimit = Style.ColumnLimit; -      if (I + 1 != E) { -        AnnotatedLine *NextLine = I[1]; -        if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline) -          ColumnLimit = getColumnLimit(TheLine.InPPDirective); -      } +        else +          Whitespaces->addUntouchableToken(*TheLine.First, +                                           TheLine.InPPDirective); -      if (TheLine.Last->TotalLength + Indent <= ColumnLimit || -          TheLine.Type == LT_ImportStatement) { -        LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun); -        while (State.NextToken) { -          formatChildren(State, /*Newline=*/false, /*DryRun=*/false, Penalty); -          Indenter->addTokenToState(State, /*Newline=*/false, DryRun); -        } -      } else if (Style.ColumnLimit == 0) { -        // FIXME: Implement nested blocks for ColumnLimit = 0. -        NoColumnLimitFormatter Formatter(Indenter); -        if (!DryRun) -          Formatter.format(Indent, &TheLine); -      } else { -        Penalty += format(TheLine, Indent, DryRun); -      } - -      if (!TheLine.InPPDirective) -        IndentForLevel[TheLine.Level] = LevelIndent; -    } else if (TheLine.ChildrenAffected) { -      format(TheLine.Children, DryRun); -    } else { -      // Format the first token if necessary, and notify the WhitespaceManager -      // about the unchanged whitespace. -      for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { -        if (Tok == TheLine.First && (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { -          unsigned LevelIndent = Tok->OriginalColumn; -          if (!DryRun) { -            // Remove trailing whitespace of the previous line. -            if ((PreviousLine && PreviousLine->Affected) || -                TheLine.LeadingEmptyLinesAffected) { -              formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent, -                               TheLine.InPPDirective); -            } else { -              Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); -            } -          } - -          if (static_cast<int>(LevelIndent) - Offset >= 0) -            LevelIndent -= Offset; -          if (Tok->isNot(tok::comment) && !TheLine.InPPDirective) -            IndentForLevel[TheLine.Level] = LevelIndent; -        } else if (!DryRun) { +        // Notify the WhitespaceManager about the unchanged whitespace. +        for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)            Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); -        } -      } -    } -    if (!DryRun) { -      for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) { -        Tok->Finalized = true;        } +      NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);      } -    PreviousLine = *I; +    if (!DryRun) +      markFinalized(TheLine.First); +    PreviousLine = &TheLine;    }    PenaltyCache[CacheKey] = Penalty;    return Penalty;  } -unsigned UnwrappedLineFormatter::format(const AnnotatedLine &Line, -                                        unsigned FirstIndent, bool DryRun) { -  LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); - -  // If the ObjC method declaration does not fit on a line, we should format -  // it with one arg per line. -  if (State.Line->Type == LT_ObjCMethodDecl) -    State.Stack.back().BreakBeforeParameter = true; - -  // Find best solution in solution space. -  return analyzeSolutionSpace(State, DryRun); -} -  void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,                                                const AnnotatedLine *PreviousLine,                                                unsigned IndentLevel,                                                unsigned Indent,                                                bool InPPDirective) { +  if (RootToken.is(tok::eof)) { +    unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); +    Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0, +                                   /*Spaces=*/0, /*TargetColumn=*/0); +    return; +  }    unsigned Newlines =        std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);    // Remove empty lines before "}" where applicable. @@ -496,7 +919,8 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,      ++Newlines;    // Remove empty lines after access specifiers. -  if (PreviousLine && PreviousLine->First->isAccessSpecifier()) +  if (PreviousLine && PreviousLine->First->isAccessSpecifier() && +      (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))      Newlines = std::min(1u, Newlines);    Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent, @@ -504,202 +928,21 @@ void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,                                               !RootToken.HasUnescapedNewline);  } -/// \brief Get the indent of \p Level from \p IndentForLevel. -/// -/// \p IndentForLevel must contain the indent for the level \c l -/// at \p IndentForLevel[l], or a value < 0 if the indent for -/// that level is unknown. -unsigned UnwrappedLineFormatter::getIndent(ArrayRef<int> IndentForLevel, -                                           unsigned Level) { -  if (IndentForLevel[Level] != -1) -    return IndentForLevel[Level]; -  if (Level == 0) -    return 0; -  return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; -} - -void UnwrappedLineFormatter::join(AnnotatedLine &A, const AnnotatedLine &B) { -  assert(!A.Last->Next); -  assert(!B.First->Previous); -  if (B.Affected) -    A.Affected = true; -  A.Last->Next = B.First; -  B.First->Previous = A.Last; -  B.First->CanBreakBefore = true; -  unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; -  for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { -    Tok->TotalLength += LengthA; -    A.Last = Tok; -  } -} - -unsigned UnwrappedLineFormatter::analyzeSolutionSpace(LineState &InitialState, -                                                      bool DryRun) { -  std::set<LineState *, CompareLineStatePointers> Seen; - -  // Increasing count of \c StateNode items we have created. This is used to -  // create a deterministic order independent of the container. -  unsigned Count = 0; -  QueueType Queue; - -  // Insert start element into queue. -  StateNode *Node = -      new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); -  Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); -  ++Count; - -  unsigned Penalty = 0; - -  // While not empty, take first element and follow edges. -  while (!Queue.empty()) { -    Penalty = Queue.top().first.first; -    StateNode *Node = Queue.top().second; -    if (!Node->State.NextToken) { -      DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); -      break; -    } -    Queue.pop(); - -    // Cut off the analysis of certain solutions if the analysis gets too -    // complex. See description of IgnoreStackForComparison. -    if (Count > 10000) -      Node->State.IgnoreStackForComparison = true; - -    if (!Seen.insert(&Node->State).second) -      // State already examined with lower penalty. -      continue; - -    FormatDecision LastFormat = Node->State.NextToken->Decision; -    if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) -      addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); -    if (LastFormat == FD_Unformatted || LastFormat == FD_Break) -      addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); -  } - -  if (Queue.empty()) { -    // We were unable to find a solution, do nothing. -    // FIXME: Add diagnostic? -    DEBUG(llvm::dbgs() << "Could not find a solution.\n"); -    return 0; -  } - -  // Reconstruct the solution. -  if (!DryRun) -    reconstructPath(InitialState, Queue.top().second); - -  DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); -  DEBUG(llvm::dbgs() << "---\n"); - -  return Penalty; -} - -#ifndef NDEBUG -static void printLineState(const LineState &State) { -  llvm::dbgs() << "State: "; -  for (const ParenState &P : State.Stack) { -    llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent -                 << " "; -  } -  llvm::dbgs() << State.NextToken->TokenText << "\n"; -} -#endif - -void UnwrappedLineFormatter::reconstructPath(LineState &State, -                                             StateNode *Current) { -  std::deque<StateNode *> Path; -  // We do not need a break before the initial token. -  while (Current->Previous) { -    Path.push_front(Current); -    Current = Current->Previous; -  } -  for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); -       I != E; ++I) { -    unsigned Penalty = 0; -    formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); -    Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); - -    DEBUG({ -      printLineState((*I)->Previous->State); -      if ((*I)->NewLine) { -        llvm::dbgs() << "Penalty for placing " -                     << (*I)->Previous->State.NextToken->Tok.getName() << ": " -                     << Penalty << "\n"; -      } -    }); -  } -} - -void UnwrappedLineFormatter::addNextStateToQueue(unsigned Penalty, -                                                 StateNode *PreviousNode, -                                                 bool NewLine, unsigned *Count, -                                                 QueueType *Queue) { -  if (NewLine && !Indenter->canBreak(PreviousNode->State)) -    return; -  if (!NewLine && Indenter->mustBreak(PreviousNode->State)) -    return; - -  StateNode *Node = new (Allocator.Allocate()) -      StateNode(PreviousNode->State, NewLine, PreviousNode); -  if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) -    return; - -  Penalty += Indenter->addTokenToState(Node->State, NewLine, true); - -  Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); -  ++(*Count); -} - -bool UnwrappedLineFormatter::formatChildren(LineState &State, bool NewLine, -                                            bool DryRun, unsigned &Penalty) { -  FormatToken &Previous = *State.NextToken->Previous; -  const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); -  if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind != BK_Block || -      Previous.Children.size() == 0) -    // The previous token does not open a block. Nothing to do. We don't -    // assert so that we can simply call this function for all tokens. -    return true; - -  if (NewLine) { -    int AdditionalIndent = State.Stack.back().Indent - -                           Previous.Children[0]->Level * Style.IndentWidth; - -    Penalty += format(Previous.Children, DryRun, AdditionalIndent, -                      /*FixBadIndentation=*/true); -    return true; -  } - -  if (Previous.Children[0]->First->MustBreakBefore) -    return false; - -  // Cannot merge multiple statements into a single line. -  if (Previous.Children.size() > 1) -    return false; - -  // Cannot merge into one line if this line ends on a comment. -  if (Previous.is(tok::comment)) -    return false; - -  // We can't put the closing "}" on a line with a trailing comment. -  if (Previous.Children[0]->Last->isTrailingComment()) -    return false; - -  // If the child line exceeds the column limit, we wouldn't want to merge it. -  // We add +2 for the trailing " }". -  if (Style.ColumnLimit > 0 && -      Previous.Children[0]->Last->TotalLength + State.Column + 2 > -          Style.ColumnLimit) -    return false; - -  if (!DryRun) { -    Whitespaces->replaceWhitespace( -        *Previous.Children[0]->First, -        /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1, -        /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); -  } -  Penalty += format(*Previous.Children[0], State.Column + 1, DryRun); - -  State.Column += 1 + Previous.Children[0]->Last->TotalLength; -  return true; +unsigned +UnwrappedLineFormatter::getColumnLimit(bool InPPDirective, +                                       const AnnotatedLine *NextLine) const { +  // In preprocessor directives reserve two chars for trailing " \" if the +  // next line continues the preprocessor directive. +  bool ContinuesPPDirective = +      InPPDirective && +      // If there is no next line, this is likely a child line and the parent +      // continues the preprocessor directive. +      (!NextLine || +       (NextLine->InPPDirective && +        // If there is an unescaped newline between this line and the next, the +        // next line starts a new preprocessor directive. +        !NextLine->First->HasUnescapedNewline)); +  return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);  }  } // namespace format | 
