diff options
Diffstat (limited to 'lib/Format/BreakableToken.h')
-rw-r--r-- | lib/Format/BreakableToken.h | 317 |
1 files changed, 186 insertions, 131 deletions
diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index e642a538e21c..8ef26ef464da 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -33,19 +33,32 @@ bool switchesFormatting(const FormatToken &Token); struct FormatStyle; -/// \brief Base class for strategies on how to break tokens. +/// \brief Base class for tokens / ranges of tokens that can allow breaking +/// within the tokens - for example, to avoid whitespace beyond the column +/// limit, or to reflow text. /// -/// This is organised around the concept of a \c Split, which is a whitespace -/// range that signifies a position of the content of a token where a -/// reformatting might be done. Operating with splits is divided into 3 -/// operations: +/// Generally, a breakable token consists of logical lines, addressed by a line +/// index. For example, in a sequence of line comments, each line comment is its +/// own logical line; similarly, for a block comment, each line in the block +/// comment is on its own logical line. +/// +/// There are two methods to compute the layout of the token: +/// - getRangeLength measures the number of columns needed for a range of text +/// within a logical line, and +/// - getContentStartColumn returns the start column at which we want the +/// content of a logical line to start (potentially after introducing a line +/// break). +/// +/// The mechanism to adapt the layout of the breakable token is organised +/// around the concept of a \c Split, which is a whitespace range that signifies +/// a position of the content of a token where a reformatting might be done. +/// +/// Operating with splits is divided into two operations: /// - getSplit, for finding a split starting at a position, -/// - getLineLengthAfterSplit, for calculating the size in columns of the rest -/// of the content after a split has been used for breaking, and /// - insertBreak, for executing the split using a whitespace manager. /// /// There is a pair of operations that are used to compress a long whitespace -/// range with a single space if that will bring the line lenght under the +/// range with a single space if that will bring the line length under the /// column limit: /// - getLineLengthAfterCompression, for calculating the size in columns of the /// line after a whitespace range has been compressed, and @@ -56,16 +69,23 @@ struct FormatStyle; /// For tokens where the whitespace before each line needs to be also /// reformatted, for example for tokens supporting reflow, there are analogous /// operations that might be executed before the main line breaking occurs: -/// - getSplitBefore, for finding a split such that the content preceding it +/// - getReflowSplit, for finding a split such that the content preceding it /// needs to be specially reflown, -/// - getLineLengthAfterSplitBefore, for calculating the line length in columns -/// of the remainder of the content after the beginning of the content has -/// been reformatted, and -/// - replaceWhitespaceBefore, for executing the reflow using a whitespace +/// - reflow, for executing the split using a whitespace manager, +/// - introducesBreakBefore, for checking if reformatting the beginning +/// of the content introduces a line break before it, +/// - adaptStartOfLine, for executing the reflow using a whitespace /// manager. /// -/// FIXME: The interface seems set in stone, so we might want to just pull the -/// strategy into the class, instead of controlling it from the outside. +/// For tokens that require the whitespace after the last line to be +/// reformatted, for example in multiline jsdoc comments that require the +/// trailing '*/' to be on a line of itself, there are analogous operations +/// that might be executed after the last line has been reformatted: +/// - getSplitAfterLastLine, for finding a split after the last line that needs +/// to be reflown, +/// - replaceWhitespaceAfterLastLine, for executing the reflow using a +/// whitespace manager. +/// class BreakableToken { public: /// \brief Contains starting character index and length of split. @@ -76,73 +96,122 @@ public: /// \brief Returns the number of lines in this token in the original code. virtual unsigned getLineCount() const = 0; - /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex, from byte offset \p TailOffset with length \p Length. + /// \brief Returns the number of columns required to format the text in the + /// byte range [\p Offset, \p Offset \c + \p Length). + /// + /// \p Offset is the byte offset from the start of the content of the line + /// at \p LineIndex. + /// + /// \p StartColumn is the column at which the text starts in the formatted + /// file, needed to compute tab stops correctly. + virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const = 0; + + /// \brief Returns the number of columns required to format the text following + /// the byte \p Offset in the line \p LineIndex, including potentially + /// unbreakable sequences of tokens following after the end of the token. + /// + /// \p Offset is the byte offset from the start of the content of the line + /// at \p LineIndex. + /// + /// \p StartColumn is the column at which the text starts in the formatted + /// file, needed to compute tab stops correctly. /// - /// Note that previous breaks are not taken into account. \p TailOffset is - /// always specified from the start of the (original) line. - /// \p Length can be set to StringRef::npos, which means "to the end of line". - virtual unsigned - getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const = 0; + /// For breakable tokens that never use extra space at the end of a line, this + /// is equivalent to getRangeLength with a Length of StringRef::npos. + virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const { + return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); + } + + /// \brief Returns the column at which content in line \p LineIndex starts, + /// assuming no reflow. + /// + /// If \p Break is true, returns the column at which the line should start + /// after the line break. + /// If \p Break is false, returns the column at which the line itself will + /// start. + virtual unsigned getContentStartColumn(unsigned LineIndex, + bool Break) const = 0; /// \brief Returns a range (offset, length) at which to break the line at /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not - /// violate \p ColumnLimit. + /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in + /// the token is formatted starting at ContentStartColumn in the reformatted + /// file. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, + unsigned ColumnLimit, unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const = 0; /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) = 0; + WhitespaceManager &Whitespaces) const = 0; - /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range - /// \p Split has been compressed into a single space. - unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns, - Split Split) const; + /// \brief Returns the number of columns needed to format + /// \p RemainingTokenColumns, assuming that Split is within the range measured + /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced + /// to a single space. + unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, + Split Split) const; /// \brief Replaces the whitespace range described by \p Split with a single /// space. virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) = 0; + WhitespaceManager &Whitespaces) const = 0; - /// \brief Returns a whitespace range (offset, length) of the content at - /// \p LineIndex such that the content preceding this range needs to be - /// reformatted before any breaks are made to this line. + /// \brief Returns whether the token supports reflowing text. + virtual bool supportsReflow() const { return false; } + + /// \brief Returns a whitespace range (offset, length) of the content at \p + /// LineIndex such that the content of that line is reflown to the end of the + /// previous one. /// - /// \p PreviousEndColumn is the end column of the previous line after - /// formatting. + /// Returning (StringRef::npos, 0) indicates reflowing is not possible. /// - /// A result having offset == StringRef::npos means that no piece of the line - /// needs to be reformatted before any breaks are made. - virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + /// The range will include any whitespace preceding the specified line's + /// content. + /// + /// If the split is not contained within one token, for example when reflowing + /// line comments, returns (0, <length>). + virtual Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { return Split(StringRef::npos, 0); } - /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex after the content preceding the whitespace range specified - /// \p SplitBefore has been reformatted, but before any breaks are made to - /// this line. - virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const { - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + /// \brief Reflows the current line into the end of the previous one. + virtual void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const {} + + /// \brief Returns whether there will be a line break at the start of the + /// token. + virtual bool introducesBreakBeforeToken() const { + return false; } /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. - /// Performs a reformatting of the content at \p LineIndex preceding the - /// whitespace range \p SplitBefore. - virtual void replaceWhitespaceBefore(unsigned LineIndex, - unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) {} + virtual void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const {} + + /// \brief Returns a whitespace range (offset, length) of the content at + /// the last line that needs to be reformatted after the last line has been + /// reformatted. + /// + /// A result having offset == StringRef::npos means that no reformat is + /// necessary. + virtual Split getSplitAfterLastLine(unsigned TailOffset) const { + return Split(StringRef::npos, 0); + } + + /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line + /// after the last line has been formatted by performing a reformatting. + void replaceWhitespaceAfterLastLine(unsigned TailOffset, + Split SplitAfterLastLine, + WhitespaceManager &Whitespaces) const { + insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine, + Whitespaces); + } /// \brief Updates the next token of \p State to the next token after this /// one. This can be used when this token manages a set of underlying tokens @@ -161,32 +230,7 @@ protected: const FormatStyle &Style; }; -/// \brief Base class for single line tokens that can be broken. -/// -/// \c getSplit() needs to be implemented by child classes. -class BreakableSingleLineToken : public BreakableToken { -public: - unsigned getLineCount() const override; - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; - -protected: - BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, - StringRef Prefix, StringRef Postfix, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style); - - // The column in which the token starts. - unsigned StartColumn; - // The prefix a line needs after a break in the token. - StringRef Prefix; - // The postfix a line needs before introducing a break. - StringRef Postfix; - // The token text excluding the prefix and postfix. - StringRef Line; -}; - -class BreakableStringLiteral : public BreakableSingleLineToken { +class BreakableStringLiteral : public BreakableToken { public: /// \brief Creates a breakable token for a single line string literal. /// @@ -198,11 +242,32 @@ public: const FormatStyle &Style); Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; + WhitespaceManager &Whitespaces) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override {} + WhitespaceManager &Whitespaces) const override {} + unsigned getLineCount() const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; + +protected: + // The column in which the token starts. + unsigned StartColumn; + // The prefix a line needs after a break in the token. + StringRef Prefix; + // The postfix a line needs before introducing a break. + StringRef Postfix; + // The token text excluding the prefix and postfix. + StringRef Line; + // Length of the sequence of tokens after this string literal that cannot + // contain line breaks. + unsigned UnbreakableTailLength; }; class BreakableComment : public BreakableToken { @@ -216,21 +281,15 @@ protected: const FormatStyle &Style); public: + bool supportsReflow() const override { return true; } unsigned getLineCount() const override; Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; + WhitespaceManager &Whitespaces) const override; protected: - virtual unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const = 0; - - // Returns a split that divides Text into a left and right parts, such that - // the left part is suitable for reflowing after PreviousEndColumn. - Split getReflowSplit(StringRef Text, StringRef ReflowPrefix, - unsigned PreviousEndColumn, unsigned ColumnLimit) const; - // Returns the token containing the line at LineIndex. const FormatToken &tokenAt(unsigned LineIndex) const; @@ -289,21 +348,23 @@ public: bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + WhitespaceManager &Whitespaces) const override; + Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; - unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const override; - void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) override; + void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; + bool introducesBreakBeforeToken() const override; + void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; + Split getSplitAfterLastLine(unsigned TailOffset) const override; + bool mayReflow(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; @@ -318,14 +379,6 @@ private: // considered part of the text). void adjustWhitespace(unsigned LineIndex, int IndentDelta); - // Computes the end column if the full Content from LineIndex gets reflown - // after PreviousEndColumn. - unsigned getReflownColumn(StringRef Content, unsigned LineIndex, - unsigned PreviousEndColumn) const; - - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const override; - // The column at which the text of a broken line should start. // Note that an optional decoration would go before that column. // IndentAtLineBreak is a uniform position for all lines in a block comment, @@ -348,6 +401,14 @@ private: // If this block comment has decorations, this is the column of the start of // the decorations. unsigned DecorationColumn; + + // If true, make sure that the opening '/**' and the closing '*/' ends on a + // line of itself. Styles like jsdoc require this for multiline comments. + bool DelimitersOnNewline; + + // Length of the sequence of tokens after this string literal that cannot + // contain line breaks. + unsigned UnbreakableTailLength; }; class BreakableLineCommentSection : public BreakableComment { @@ -357,29 +418,23 @@ public: bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + WhitespaceManager &Whitespaces) const override; + Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; - unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const override; - void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) override; + void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; + void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; void updateNextToken(LineState &State) const override; bool mayReflow(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; private: - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const override; - // OriginalPrefix[i] contains the original prefix of line i, including // trailing whitespace before the start of the content. The indentation // preceding the prefix is not included. |