summaryrefslogtreecommitdiff
path: root/lib/Format/BreakableToken.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Format/BreakableToken.h')
-rw-r--r--lib/Format/BreakableToken.h317
1 files changed, 186 insertions, 131 deletions
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h
index e642a538e21c..8ef26ef464da 100644
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h
@@ -33,19 +33,32 @@ bool switchesFormatting(const FormatToken &Token);
struct FormatStyle;
-/// \brief Base class for strategies on how to break tokens.
+/// \brief Base class for tokens / ranges of tokens that can allow breaking
+/// within the tokens - for example, to avoid whitespace beyond the column
+/// limit, or to reflow text.
///
-/// This is organised around the concept of a \c Split, which is a whitespace
-/// range that signifies a position of the content of a token where a
-/// reformatting might be done. Operating with splits is divided into 3
-/// operations:
+/// Generally, a breakable token consists of logical lines, addressed by a line
+/// index. For example, in a sequence of line comments, each line comment is its
+/// own logical line; similarly, for a block comment, each line in the block
+/// comment is on its own logical line.
+///
+/// There are two methods to compute the layout of the token:
+/// - getRangeLength measures the number of columns needed for a range of text
+/// within a logical line, and
+/// - getContentStartColumn returns the start column at which we want the
+/// content of a logical line to start (potentially after introducing a line
+/// break).
+///
+/// The mechanism to adapt the layout of the breakable token is organised
+/// around the concept of a \c Split, which is a whitespace range that signifies
+/// a position of the content of a token where a reformatting might be done.
+///
+/// Operating with splits is divided into two operations:
/// - getSplit, for finding a split starting at a position,
-/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
-/// of the content after a split has been used for breaking, and
/// - insertBreak, for executing the split using a whitespace manager.
///
/// There is a pair of operations that are used to compress a long whitespace
-/// range with a single space if that will bring the line lenght under the
+/// range with a single space if that will bring the line length under the
/// column limit:
/// - getLineLengthAfterCompression, for calculating the size in columns of the
/// line after a whitespace range has been compressed, and
@@ -56,16 +69,23 @@ struct FormatStyle;
/// For tokens where the whitespace before each line needs to be also
/// reformatted, for example for tokens supporting reflow, there are analogous
/// operations that might be executed before the main line breaking occurs:
-/// - getSplitBefore, for finding a split such that the content preceding it
+/// - getReflowSplit, for finding a split such that the content preceding it
/// needs to be specially reflown,
-/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
-/// of the remainder of the content after the beginning of the content has
-/// been reformatted, and
-/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+/// - reflow, for executing the split using a whitespace manager,
+/// - introducesBreakBefore, for checking if reformatting the beginning
+/// of the content introduces a line break before it,
+/// - adaptStartOfLine, for executing the reflow using a whitespace
/// manager.
///
-/// FIXME: The interface seems set in stone, so we might want to just pull the
-/// strategy into the class, instead of controlling it from the outside.
+/// For tokens that require the whitespace after the last line to be
+/// reformatted, for example in multiline jsdoc comments that require the
+/// trailing '*/' to be on a line of itself, there are analogous operations
+/// that might be executed after the last line has been reformatted:
+/// - getSplitAfterLastLine, for finding a split after the last line that needs
+/// to be reflown,
+/// - replaceWhitespaceAfterLastLine, for executing the reflow using a
+/// whitespace manager.
+///
class BreakableToken {
public:
/// \brief Contains starting character index and length of split.
@@ -76,73 +96,122 @@ public:
/// \brief Returns the number of lines in this token in the original code.
virtual unsigned getLineCount() const = 0;
- /// \brief Returns the number of columns required to format the piece of line
- /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
+ /// \brief Returns the number of columns required to format the text in the
+ /// byte range [\p Offset, \p Offset \c + \p Length).
+ ///
+ /// \p Offset is the byte offset from the start of the content of the line
+ /// at \p LineIndex.
+ ///
+ /// \p StartColumn is the column at which the text starts in the formatted
+ /// file, needed to compute tab stops correctly.
+ virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const = 0;
+
+ /// \brief Returns the number of columns required to format the text following
+ /// the byte \p Offset in the line \p LineIndex, including potentially
+ /// unbreakable sequences of tokens following after the end of the token.
+ ///
+ /// \p Offset is the byte offset from the start of the content of the line
+ /// at \p LineIndex.
+ ///
+ /// \p StartColumn is the column at which the text starts in the formatted
+ /// file, needed to compute tab stops correctly.
///
- /// Note that previous breaks are not taken into account. \p TailOffset is
- /// always specified from the start of the (original) line.
- /// \p Length can be set to StringRef::npos, which means "to the end of line".
- virtual unsigned
- getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const = 0;
+ /// For breakable tokens that never use extra space at the end of a line, this
+ /// is equivalent to getRangeLength with a Length of StringRef::npos.
+ virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+ unsigned StartColumn) const {
+ return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
+ }
+
+ /// \brief Returns the column at which content in line \p LineIndex starts,
+ /// assuming no reflow.
+ ///
+ /// If \p Break is true, returns the column at which the line should start
+ /// after the line break.
+ /// If \p Break is false, returns the column at which the line itself will
+ /// start.
+ virtual unsigned getContentStartColumn(unsigned LineIndex,
+ bool Break) const = 0;
/// \brief Returns a range (offset, length) at which to break the line at
/// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
- /// violate \p ColumnLimit.
+ /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in
+ /// the token is formatted starting at ContentStartColumn in the reformatted
+ /// file.
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit,
+ unsigned ColumnLimit, unsigned ContentStartColumn,
llvm::Regex &CommentPragmasRegex) const = 0;
/// \brief Emits the previously retrieved \p Split via \p Whitespaces.
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) = 0;
+ WhitespaceManager &Whitespaces) const = 0;
- /// \brief Returns the number of columns required to format the piece of line
- /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range
- /// \p Split has been compressed into a single space.
- unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
- Split Split) const;
+ /// \brief Returns the number of columns needed to format
+ /// \p RemainingTokenColumns, assuming that Split is within the range measured
+ /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced
+ /// to a single space.
+ unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,
+ Split Split) const;
/// \brief Replaces the whitespace range described by \p Split with a single
/// space.
virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
Split Split,
- WhitespaceManager &Whitespaces) = 0;
+ WhitespaceManager &Whitespaces) const = 0;
- /// \brief Returns a whitespace range (offset, length) of the content at
- /// \p LineIndex such that the content preceding this range needs to be
- /// reformatted before any breaks are made to this line.
+ /// \brief Returns whether the token supports reflowing text.
+ virtual bool supportsReflow() const { return false; }
+
+ /// \brief Returns a whitespace range (offset, length) of the content at \p
+ /// LineIndex such that the content of that line is reflown to the end of the
+ /// previous one.
///
- /// \p PreviousEndColumn is the end column of the previous line after
- /// formatting.
+ /// Returning (StringRef::npos, 0) indicates reflowing is not possible.
///
- /// A result having offset == StringRef::npos means that no piece of the line
- /// needs to be reformatted before any breaks are made.
- virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit,
+ /// The range will include any whitespace preceding the specified line's
+ /// content.
+ ///
+ /// If the split is not contained within one token, for example when reflowing
+ /// line comments, returns (0, <length>).
+ virtual Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const {
return Split(StringRef::npos, 0);
}
- /// \brief Returns the number of columns required to format the piece of line
- /// at \p LineIndex after the content preceding the whitespace range specified
- /// \p SplitBefore has been reformatted, but before any breaks are made to
- /// this line.
- virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
- unsigned TailOffset,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- Split SplitBefore) const {
- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ /// \brief Reflows the current line into the end of the previous one.
+ virtual void reflow(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const {}
+
+ /// \brief Returns whether there will be a line break at the start of the
+ /// token.
+ virtual bool introducesBreakBeforeToken() const {
+ return false;
}
/// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
- /// Performs a reformatting of the content at \p LineIndex preceding the
- /// whitespace range \p SplitBefore.
- virtual void replaceWhitespaceBefore(unsigned LineIndex,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit, Split SplitBefore,
- WhitespaceManager &Whitespaces) {}
+ virtual void adaptStartOfLine(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const {}
+
+ /// \brief Returns a whitespace range (offset, length) of the content at
+ /// the last line that needs to be reformatted after the last line has been
+ /// reformatted.
+ ///
+ /// A result having offset == StringRef::npos means that no reformat is
+ /// necessary.
+ virtual Split getSplitAfterLastLine(unsigned TailOffset) const {
+ return Split(StringRef::npos, 0);
+ }
+
+ /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line
+ /// after the last line has been formatted by performing a reformatting.
+ void replaceWhitespaceAfterLastLine(unsigned TailOffset,
+ Split SplitAfterLastLine,
+ WhitespaceManager &Whitespaces) const {
+ insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
+ Whitespaces);
+ }
/// \brief Updates the next token of \p State to the next token after this
/// one. This can be used when this token manages a set of underlying tokens
@@ -161,32 +230,7 @@ protected:
const FormatStyle &Style;
};
-/// \brief Base class for single line tokens that can be broken.
-///
-/// \c getSplit() needs to be implemented by child classes.
-class BreakableSingleLineToken : public BreakableToken {
-public:
- unsigned getLineCount() const override;
- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const override;
-
-protected:
- BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
- StringRef Prefix, StringRef Postfix,
- bool InPPDirective, encoding::Encoding Encoding,
- const FormatStyle &Style);
-
- // The column in which the token starts.
- unsigned StartColumn;
- // The prefix a line needs after a break in the token.
- StringRef Prefix;
- // The postfix a line needs before introducing a break.
- StringRef Postfix;
- // The token text excluding the prefix and postfix.
- StringRef Line;
-};
-
-class BreakableStringLiteral : public BreakableSingleLineToken {
+class BreakableStringLiteral : public BreakableToken {
public:
/// \brief Creates a breakable token for a single line string literal.
///
@@ -198,11 +242,32 @@ public:
const FormatStyle &Style);
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ unsigned ReflowColumn,
llvm::Regex &CommentPragmasRegex) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
+ WhitespaceManager &Whitespaces) const override;
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override {}
+ WhitespaceManager &Whitespaces) const override {}
+ unsigned getLineCount() const override;
+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const override;
+ unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+ unsigned StartColumn) const override;
+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
+
+protected:
+ // The column in which the token starts.
+ unsigned StartColumn;
+ // The prefix a line needs after a break in the token.
+ StringRef Prefix;
+ // The postfix a line needs before introducing a break.
+ StringRef Postfix;
+ // The token text excluding the prefix and postfix.
+ StringRef Line;
+ // Length of the sequence of tokens after this string literal that cannot
+ // contain line breaks.
+ unsigned UnbreakableTailLength;
};
class BreakableComment : public BreakableToken {
@@ -216,21 +281,15 @@ protected:
const FormatStyle &Style);
public:
+ bool supportsReflow() const override { return true; }
unsigned getLineCount() const override;
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ unsigned ReflowColumn,
llvm::Regex &CommentPragmasRegex) const override;
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
+ WhitespaceManager &Whitespaces) const override;
protected:
- virtual unsigned getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const = 0;
-
- // Returns a split that divides Text into a left and right parts, such that
- // the left part is suitable for reflowing after PreviousEndColumn.
- Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,
- unsigned PreviousEndColumn, unsigned ColumnLimit) const;
-
// Returns the token containing the line at LineIndex.
const FormatToken &tokenAt(unsigned LineIndex) const;
@@ -289,21 +348,23 @@ public:
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const override;
+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const override;
+ unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+ unsigned StartColumn) const override;
+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
- Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit,
+ WhitespaceManager &Whitespaces) const override;
+ Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
- unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
- unsigned TailOffset,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- Split SplitBefore) const override;
- void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit, Split SplitBefore,
- WhitespaceManager &Whitespaces) override;
+ void reflow(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const override;
+ bool introducesBreakBeforeToken() const override;
+ void adaptStartOfLine(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const override;
+ Split getSplitAfterLastLine(unsigned TailOffset) const override;
+
bool mayReflow(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
@@ -318,14 +379,6 @@ private:
// considered part of the text).
void adjustWhitespace(unsigned LineIndex, int IndentDelta);
- // Computes the end column if the full Content from LineIndex gets reflown
- // after PreviousEndColumn.
- unsigned getReflownColumn(StringRef Content, unsigned LineIndex,
- unsigned PreviousEndColumn) const;
-
- unsigned getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const override;
-
// The column at which the text of a broken line should start.
// Note that an optional decoration would go before that column.
// IndentAtLineBreak is a uniform position for all lines in a block comment,
@@ -348,6 +401,14 @@ private:
// If this block comment has decorations, this is the column of the start of
// the decorations.
unsigned DecorationColumn;
+
+ // If true, make sure that the opening '/**' and the closing '*/' ends on a
+ // line of itself. Styles like jsdoc require this for multiline comments.
+ bool DelimitersOnNewline;
+
+ // Length of the sequence of tokens after this string literal that cannot
+ // contain line breaks.
+ unsigned UnbreakableTailLength;
};
class BreakableLineCommentSection : public BreakableComment {
@@ -357,29 +418,23 @@ public:
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
- StringRef::size_type Length) const override;
+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+ StringRef::size_type Length,
+ unsigned StartColumn) const override;
+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- WhitespaceManager &Whitespaces) override;
- Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit,
+ WhitespaceManager &Whitespaces) const override;
+ Split getReflowSplit(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
- unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
- unsigned TailOffset,
- unsigned PreviousEndColumn,
- unsigned ColumnLimit,
- Split SplitBefore) const override;
- void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
- unsigned ColumnLimit, Split SplitBefore,
- WhitespaceManager &Whitespaces) override;
+ void reflow(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const override;
+ void adaptStartOfLine(unsigned LineIndex,
+ WhitespaceManager &Whitespaces) const override;
void updateNextToken(LineState &State) const override;
bool mayReflow(unsigned LineIndex,
llvm::Regex &CommentPragmasRegex) const override;
private:
- unsigned getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const override;
-
// OriginalPrefix[i] contains the original prefix of line i, including
// trailing whitespace before the start of the content. The indentation
// preceding the prefix is not included.