1 files changed, 453 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
new file mode 100644
index 000000000000..fde89db864b1
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
@@ -0,0 +1,453 @@
+//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements an indenter that manages the indentation of
+/// continuations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
+#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+#include <map>
+#include <tuple>
+
+namespace clang {
+class SourceManager;
+
+namespace format {
+
+class AnnotatedLine;
+class BreakableToken;
+struct FormatToken;
+struct LineState;
+struct ParenState;
+struct RawStringFormatStyleManager;
+class WhitespaceManager;
+
+struct RawStringFormatStyleManager {
+  llvm::StringMap<FormatStyle> DelimiterStyle;
+  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
+
+  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
+
+  llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
+
+  llvm::Optional<FormatStyle>
+  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
+};
+
+class ContinuationIndenter {
+public:
+  /// Constructs a \c ContinuationIndenter to format \p Line starting in
+  /// column \p FirstIndent.
+  ContinuationIndenter(const FormatStyle &Style,
+                       const AdditionalKeywords &Keywords,
+                       const SourceManager &SourceMgr,
+                       WhitespaceManager &Whitespaces,
+                       encoding::Encoding Encoding,
+                       bool BinPackInconclusiveFunctions);
+
+  /// Get the initial state, i.e. the state after placing \p Line's
+  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
+  /// the case of formatting inside raw string literals, \p FirstStartColumn is
+  /// the column at which the state of the parent formatter is.
+  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
+                            const AnnotatedLine *Line, bool DryRun);
+
+  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
+  // better home.
+  /// Returns \c true, if a line break after \p State is allowed.
+  bool canBreak(const LineState &State);
+
+  /// Returns \c true, if a line break after \p State is mandatory.
+  bool mustBreak(const LineState &State);
+
+  /// Appends the next token to \p State and updates information
+  /// necessary for indentation.
+  ///
+  /// Puts the token on the current line if \p Newline is \c false and adds a
+  /// line break and necessary indentation otherwise.
+  ///
+  /// If \p DryRun is \c false, also creates and stores the required
+  /// \c Replacement.
+  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
+                           unsigned ExtraSpaces = 0);
+
+  /// Get the column limit for this line. This is the style's column
+  /// limit, potentially reduced for preprocessor definitions.
+  unsigned getColumnLimit(const LineState &State) const;
+
+private:
+  /// Mark the next token as consumed in \p State and modify its stacks
+  /// accordingly.
+  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
+
+  /// Update 'State' according to the next token's fake left parentheses.
+  void moveStatePastFakeLParens(LineState &State, bool Newline);
+  /// Update 'State' according to the next token's fake r_parens.
+  void moveStatePastFakeRParens(LineState &State);
+
+  /// Update 'State' according to the next token being one of "(<{[".
+  void moveStatePastScopeOpener(LineState &State, bool Newline);
+  /// Update 'State' according to the next token being one of ")>}]".
+  void moveStatePastScopeCloser(LineState &State);
+  /// Update 'State' with the next token opening a nested block.
+  void moveStateToNewBlock(LineState &State);
+
+  /// Reformats a raw string literal.
+  ///
+  /// \returns An extra penalty induced by reformatting the token.
+  unsigned reformatRawStringLiteral(const FormatToken &Current,
+                                    LineState &State,
+                                    const FormatStyle &RawStringStyle,
+                                    bool DryRun);
+
+  /// If the current token is at the end of the current line, handle
+  /// the transition to the next line.
+  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
+                           bool DryRun, bool AllowBreak);
+
+  /// If \p Current is a raw string that is configured to be reformatted,
+  /// return the style to be used.
+  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
+                                                const LineState &State);
+
+  /// If the current token sticks out over the end of the line, break
+  /// it if possible.
+  ///
+  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
+  /// when tokens are broken or lines exceed the column limit, and exceeded
+  /// indicates whether the algorithm purposefully left lines exceeding the
+  /// column limit.
+  ///
+  /// The returned penalty will cover the cost of the additional line breaks
+  /// and column limit violation in all lines except for the last one. The
+  /// penalty for the column limit violation in the last line (and in single
+  /// line tokens) is handled in \c addNextStateToQueue.
+  ///
+  /// \p Strict indicates whether reflowing is allowed to leave characters
+  /// protruding the column limit; if true, lines will be split strictly within
+  /// the column limit where possible; if false, words are allowed to protrude
+  /// over the column limit as long as the penalty is less than the penalty
+  /// of a break.
+  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
+                                                 LineState &State,
+                                                 bool AllowBreak, bool DryRun,
+                                                 bool Strict);
+
+  /// Returns the \c BreakableToken starting at \p Current, or nullptr
+  /// if the current token cannot be broken.
+  std::unique_ptr<BreakableToken>
+  createBreakableToken(const FormatToken &Current, LineState &State,
+                       bool AllowBreak);
+
+  /// Appends the next token to \p State and updates information
+  /// necessary for indentation.
+  ///
+  /// Puts the token on the current line.
+  ///
+  /// If \p DryRun is \c false, also creates and stores the required
+  /// \c Replacement.
+  void addTokenOnCurrentLine(LineState &State, bool DryRun,
+                             unsigned ExtraSpaces);
+
+  /// Appends the next token to \p State and updates information
+  /// necessary for indentation.
+  ///
+  /// Adds a line break and necessary indentation.
+  ///
+  /// If \p DryRun is \c false, also creates and stores the required
+  /// \c Replacement.
+  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
+
+  /// Calculate the new column for a line wrap before the next token.
+  unsigned getNewLineColumn(const LineState &State);
+
+  /// Adds a multiline token to the \p State.
+  ///
+  /// \returns Extra penalty for the first line of the literal: last line is
+  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
+  /// matter, as we don't change them.
+  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
+
+  /// Returns \c true if the next token starts a multiline string
+  /// literal.
+  ///
+  /// This includes implicitly concatenated strings, strings that will be broken
+  /// by clang-format and string literals with escaped newlines.
+  bool nextIsMultilineString(const LineState &State);
+
+  FormatStyle Style;
+  const AdditionalKeywords &Keywords;
+  const SourceManager &SourceMgr;
+  WhitespaceManager &Whitespaces;
+  encoding::Encoding Encoding;
+  bool BinPackInconclusiveFunctions;
+  llvm::Regex CommentPragmasRegex;
+  const RawStringFormatStyleManager RawStringFormats;
+};
+
+struct ParenState {
+  ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
+             bool AvoidBinPacking, bool NoLineBreak)
+      : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
+        NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
+        AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
+        NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
+        LastOperatorWrapped(true), ContainsLineBreak(false),
+        ContainsUnwrappedBuilder(false), AlignColons(true),
+        ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
+        NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {}
+
+  /// \brief The token opening this parenthesis level, or nullptr if this level
+  /// is opened by fake parenthesis.
+  ///
+  /// Not considered for memoization as it will always have the same value at
+  /// the same token.
+  const FormatToken *Tok;
+
+  /// The position to which a specific parenthesis level needs to be
+  /// indented.
+  unsigned Indent;
+
+  /// The position of the last space on each level.
+  ///
+  /// Used e.g. to break like:
+  /// functionCall(Parameter, otherCall(
+  ///                             OtherParameter));
+  unsigned LastSpace;
+
+  /// If a block relative to this parenthesis level gets wrapped, indent
+  /// it this much.
+  unsigned NestedBlockIndent;
+
+  /// The position the first "<<" operator encountered on each level.
+  ///
+  /// Used to align "<<" operators. 0 if no such operator has been encountered
+  /// on a level.
+  unsigned FirstLessLess = 0;
+
+  /// The column of a \c ? in a conditional expression;
+  unsigned QuestionColumn = 0;
+
+  /// The position of the colon in an ObjC method declaration/call.
+  unsigned ColonPos = 0;
+
+  /// The start of the most recent function in a builder-type call.
+  unsigned StartOfFunctionCall = 0;
+
+  /// Contains the start of array subscript expressions, so that they
+  /// can be aligned.
+  unsigned StartOfArraySubscripts = 0;
+
+  /// If a nested name specifier was broken over multiple lines, this
+  /// contains the start column of the second line. Otherwise 0.
+  unsigned NestedNameSpecifierContinuation = 0;
+
+  /// If a call expression was broken over multiple lines, this
+  /// contains the start column of the second line. Otherwise 0.
+  unsigned CallContinuation = 0;
+
+  /// The column of the first variable name in a variable declaration.
+  ///
+  /// Used to align further variables if necessary.
+  unsigned VariablePos = 0;
+
+  /// Whether a newline needs to be inserted before the block's closing
+  /// brace.
+  ///
+  /// We only want to insert a newline before the closing brace if there also
+  /// was a newline after the beginning left brace.
+  bool BreakBeforeClosingBrace : 1;
+
+  /// Avoid bin packing, i.e. multiple parameters/elements on multiple
+  /// lines, in this context.
+  bool AvoidBinPacking : 1;
+
+  /// Break after the next comma (or all the commas in this context if
+  /// \c AvoidBinPacking is \c true).
+  bool BreakBeforeParameter : 1;
+
+  /// Line breaking in this context would break a formatting rule.
+  bool NoLineBreak : 1;
+
+  /// Same as \c NoLineBreak, but is restricted until the end of the
+  /// operand (including the next ",").
+  bool NoLineBreakInOperand : 1;
+
+  /// True if the last binary operator on this level was wrapped to the
+  /// next line.
+  bool LastOperatorWrapped : 1;
+
+  /// \c true if this \c ParenState already contains a line-break.
+  ///
+  /// The first line break in a certain \c ParenState causes extra penalty so
+  /// that clang-format prefers similar breaks, i.e. breaks in the same
+  /// parenthesis.
+  bool ContainsLineBreak : 1;
+
+  /// \c true if this \c ParenState contains multiple segments of a
+  /// builder-type call on one line.
+  bool ContainsUnwrappedBuilder : 1;
+
+  /// \c true if the colons of the curren ObjC method expression should
+  /// be aligned.
+  ///
+  /// Not considered for memoization as it will always have the same value at
+  /// the same token.
+  bool AlignColons : 1;
+
+  /// \c true if at least one selector name was found in the current
+  /// ObjC method expression.
+  ///
+  /// Not considered for memoization as it will always have the same value at
+  /// the same token.
+  bool ObjCSelectorNameFound : 1;
+
+  /// \c true if there are multiple nested blocks inside these parens.
+  ///
+  /// Not considered for memoization as it will always have the same value at
+  /// the same token.
+  bool HasMultipleNestedBlocks : 1;
+
+  /// The start of a nested block (e.g. lambda introducer in C++ or
+  /// "function" in JavaScript) is not wrapped to a new line.
+  bool NestedBlockInlined : 1;
+
+  /// \c true if the current \c ParenState represents an Objective-C
+  /// array literal.
+  bool IsInsideObjCArrayLiteral : 1;
+
+  bool operator<(const ParenState &Other) const {
+    if (Indent != Other.Indent)
+      return Indent < Other.Indent;
+    if (LastSpace != Other.LastSpace)
+      return LastSpace < Other.LastSpace;
+    if (NestedBlockIndent != Other.NestedBlockIndent)
+      return NestedBlockIndent < Other.NestedBlockIndent;
+    if (FirstLessLess != Other.FirstLessLess)
+      return FirstLessLess < Other.FirstLessLess;
+    if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
+      return BreakBeforeClosingBrace;
+    if (QuestionColumn != Other.QuestionColumn)
+      return QuestionColumn < Other.QuestionColumn;
+    if (AvoidBinPacking != Other.AvoidBinPacking)
+      return AvoidBinPacking;
+    if (BreakBeforeParameter != Other.BreakBeforeParameter)
+      return BreakBeforeParameter;
+    if (NoLineBreak != Other.NoLineBreak)
+      return NoLineBreak;
+    if (LastOperatorWrapped != Other.LastOperatorWrapped)
+      return LastOperatorWrapped;
+    if (ColonPos != Other.ColonPos)
+      return ColonPos < Other.ColonPos;
+    if (StartOfFunctionCall != Other.StartOfFunctionCall)
+      return StartOfFunctionCall < Other.StartOfFunctionCall;
+    if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
+      return StartOfArraySubscripts < Other.StartOfArraySubscripts;
+    if (CallContinuation != Other.CallContinuation)
+      return CallContinuation < Other.CallContinuation;
+    if (VariablePos != Other.VariablePos)
+      return VariablePos < Other.VariablePos;
+    if (ContainsLineBreak != Other.ContainsLineBreak)
+      return ContainsLineBreak;
+    if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
+      return ContainsUnwrappedBuilder;
+    if (NestedBlockInlined != Other.NestedBlockInlined)
+      return NestedBlockInlined;
+    return false;
+  }
+};
+
+/// The current state when indenting a unwrapped line.
+///
+/// As the indenting tries different combinations this is copied by value.
+struct LineState {
+  /// The number of used columns in the current line.
+  unsigned Column;
+
+  /// The token that needs to be next formatted.
+  FormatToken *NextToken;
+
+  /// \c true if this line contains a continued for-loop section.
+  bool LineContainsContinuedForLoopSection;
+
+  /// \c true if \p NextToken should not continue this line.
+  bool NoContinuation;
+
+  /// The \c NestingLevel at the start of this line.
+  unsigned StartOfLineLevel;
+
+  /// The lowest \c NestingLevel on the current line.
+  unsigned LowestLevelOnLine;
+
+  /// The start column of the string literal, if we're in a string
+  /// literal sequence, 0 otherwise.
+  unsigned StartOfStringLiteral;
+
+  /// A stack keeping track of properties applying to parenthesis
+  /// levels.
+  std::vector<ParenState> Stack;
+
+  /// Ignore the stack of \c ParenStates for state comparison.
+  ///
+  /// In long and deeply nested unwrapped lines, the current algorithm can
+  /// be insufficient for finding the best formatting with a reasonable amount
+  /// of time and memory. Setting this flag will effectively lead to the
+  /// algorithm not analyzing some combinations. However, these combinations
+  /// rarely contain the optimal solution: In short, accepting a higher
+  /// penalty early would need to lead to different values in the \c
+  /// ParenState stack (in an otherwise identical state) and these different
+  /// values would need to lead to a significant amount of avoided penalty
+  /// later.
+  ///
+  /// FIXME: Come up with a better algorithm instead.
+  bool IgnoreStackForComparison;
+
+  /// The indent of the first token.
+  unsigned FirstIndent;
+
+  /// The line that is being formatted.
+  ///
+  /// Does not need to be considered for memoization because it doesn't change.
+  const AnnotatedLine *Line;
+
+  /// Comparison operator to be able to used \c LineState in \c map.
+  bool operator<(const LineState &Other) const {
+    if (NextToken != Other.NextToken)
+      return NextToken < Other.NextToken;
+    if (Column != Other.Column)
+      return Column < Other.Column;
+    if (LineContainsContinuedForLoopSection !=
+        Other.LineContainsContinuedForLoopSection)
+      return LineContainsContinuedForLoopSection;
+    if (NoContinuation != Other.NoContinuation)
+      return NoContinuation;
+    if (StartOfLineLevel != Other.StartOfLineLevel)
+      return StartOfLineLevel < Other.StartOfLineLevel;
+    if (LowestLevelOnLine != Other.LowestLevelOnLine)
+      return LowestLevelOnLine < Other.LowestLevelOnLine;
+    if (StartOfStringLiteral != Other.StartOfStringLiteral)
+      return StartOfStringLiteral < Other.StartOfStringLiteral;
+    if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
+      return false;
+    return Stack < Other.Stack;
+  }
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif