diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:18:08 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:18:08 +0000 |
commit | bab175ec4b075c8076ba14c762900392533f6ee4 (patch) | |
tree | 01f4f29419a2cb10abe13c1e63cd2a66068b0137 /lib/Format | |
parent | 8b7a8012d223fac5d17d16a66bb39168a9a1dfc0 (diff) |
Notes
Diffstat (limited to 'lib/Format')
-rw-r--r-- | lib/Format/BreakableToken.cpp | 16 | ||||
-rw-r--r-- | lib/Format/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Format/Comments.cpp | 36 | ||||
-rw-r--r-- | lib/Format/Comments.h | 33 | ||||
-rw-r--r-- | lib/Format/ContinuationIndenter.cpp | 14 | ||||
-rw-r--r-- | lib/Format/Encoding.h | 29 | ||||
-rw-r--r-- | lib/Format/Format.cpp | 498 | ||||
-rw-r--r-- | lib/Format/FormatToken.cpp | 15 | ||||
-rw-r--r-- | lib/Format/FormatToken.h | 19 | ||||
-rw-r--r-- | lib/Format/FormatTokenLexer.cpp | 70 | ||||
-rw-r--r-- | lib/Format/FormatTokenLexer.h | 21 | ||||
-rw-r--r-- | lib/Format/SortJavaScriptImports.cpp | 62 | ||||
-rw-r--r-- | lib/Format/TokenAnalyzer.cpp | 16 | ||||
-rw-r--r-- | lib/Format/TokenAnalyzer.h | 7 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.cpp | 197 | ||||
-rw-r--r-- | lib/Format/TokenAnnotator.h | 1 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.cpp | 3 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineFormatter.h | 2 | ||||
-rw-r--r-- | lib/Format/UnwrappedLineParser.cpp | 46 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.cpp | 15 | ||||
-rw-r--r-- | lib/Format/WhitespaceManager.h | 3 |
21 files changed, 776 insertions, 328 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 36a8c4d8da6d5..6363f895f95b9 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "BreakableToken.h" +#include "Comments.h" #include "clang/Basic/CharInfo.h" #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" @@ -182,21 +183,6 @@ void BreakableStringLiteral::insertBreak(unsigned LineIndex, Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); } -static StringRef getLineCommentIndentPrefix(StringRef Comment) { - static const char *const KnownPrefixes[] = {"///", "//", "//!"}; - StringRef LongestPrefix; - for (StringRef KnownPrefix : KnownPrefixes) { - if (Comment.startswith(KnownPrefix)) { - size_t PrefixLength = KnownPrefix.size(); - while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') - ++PrefixLength; - if (PrefixLength > LongestPrefix.size()) - LongestPrefix = Comment.substr(0, PrefixLength); - } - } - return LongestPrefix; -} - BreakableLineComment::BreakableLineComment( const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt index cb46b9f255d2b..c977c2d3c5fa1 100644 --- a/lib/Format/CMakeLists.txt +++ b/lib/Format/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangFormat AffectedRangeManager.cpp BreakableToken.cpp + Comments.cpp ContinuationIndenter.cpp Format.cpp FormatToken.cpp diff --git a/lib/Format/Comments.cpp b/lib/Format/Comments.cpp new file mode 100644 index 0000000000000..1b27f5b30a603 --- /dev/null +++ b/lib/Format/Comments.cpp @@ -0,0 +1,36 @@ +//===--- Comments.cpp - Comment Manipulation -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Implements comment manipulation. +/// +//===----------------------------------------------------------------------===// + +#include "Comments.h" + +namespace clang { +namespace format { + +StringRef getLineCommentIndentPrefix(StringRef Comment) { + static const char *const KnownPrefixes[] = {"///", "//", "//!"}; + StringRef LongestPrefix; + for (StringRef KnownPrefix : KnownPrefixes) { + if (Comment.startswith(KnownPrefix)) { + size_t PrefixLength = KnownPrefix.size(); + while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') + ++PrefixLength; + if (PrefixLength > LongestPrefix.size()) + LongestPrefix = Comment.substr(0, PrefixLength); + } + } + return LongestPrefix; +} + +} // namespace format +} // namespace clang diff --git a/lib/Format/Comments.h b/lib/Format/Comments.h new file mode 100644 index 0000000000000..59f0596361a5c --- /dev/null +++ b/lib/Format/Comments.h @@ -0,0 +1,33 @@ +//===--- Comments.cpp - Comment manipulation -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Declares comment manipulation functionality. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_COMMENTS_H +#define LLVM_CLANG_LIB_FORMAT_COMMENTS_H + +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace format { + +/// \brief Returns the comment prefix of the line comment \p Comment. +/// +/// The comment prefix consists of a leading known prefix, like "//" or "///", +/// together with the following whitespace. +StringRef getLineCommentIndentPrefix(StringRef Comment); + +} // namespace format +} // namespace clang + +#endif diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 322969e4bb71e..bf075ab6d53ea 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -19,7 +19,6 @@ #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "llvm/Support/Debug.h" -#include <string> #define DEBUG_TYPE "format-formatter" @@ -178,6 +177,9 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) || Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0)) return true; + if (Current.is(TT_ObjCMethodExpr) && !Previous.is(TT_SelectorName) && + State.Line->startsWith(TT_ObjCMethodSpecifier)) + return true; if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound && State.Stack.back().BreakBeforeParameter) return true; @@ -458,7 +460,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, Penalty += State.NextToken->SplitPenalty; // Breaking before the first "<<" is generally not desirable if the LHS is - // short. Also always add the penalty if the LHS is split over mutliple lines + // short. Also always add the penalty if the LHS is split over multiple lines // to avoid unnecessary line breaks that just work around this penalty. if (NextNonComment->is(tok::lessless) && State.Stack.back().FirstLessLess == 0 && @@ -521,7 +523,8 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, Style.ContinuationIndentWidth; } - if ((Previous.isOneOf(tok::comma, tok::semi) && + if ((PreviousNonComment && + PreviousNonComment->isOneOf(tok::comma, tok::semi) && !State.Stack.back().AvoidBinPacking) || Previous.is(TT_BinaryOperator)) State.Stack.back().BreakBeforeParameter = false; @@ -557,6 +560,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // and we need to avoid bin packing there. bool NestedBlockSpecialCase = Style.Language != FormatStyle::LK_Cpp && + Style.Language != FormatStyle::LK_ObjC && Current.is(tok::r_brace) && State.Stack.size() > 1 && State.Stack[State.Stack.size() - 2].NestedBlockInlined; if (!NestedBlockSpecialCase) @@ -672,6 +676,8 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return State.Stack.back().ColonPos - NextNonComment->ColumnWidth; return State.Stack.back().Indent; } + if (NextNonComment->is(tok::colon) && NextNonComment->is(TT_ObjCMethodExpr)) + return State.Stack.back().ColonPos; if (NextNonComment->is(TT_ArraySubscriptLSquare)) { if (State.Stack.back().StartOfArraySubscripts != 0) return State.Stack.back().StartOfArraySubscripts; @@ -861,7 +867,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, // Exclude relational operators, as there, it is always more desirable to // have the LHS 'left' of the RHS. if (Previous && Previous->getPrecedence() != prec::Assignment && - Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && + Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, tok::comma) && Previous->getPrecedence() != prec::Relational) { bool BreakBeforeOperator = Previous->is(tok::lessless) || diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h index 148f7fd0e91ba..3339597b4edd7 100644 --- a/lib/Format/Encoding.h +++ b/lib/Format/Encoding.h @@ -33,32 +33,13 @@ enum Encoding { /// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8, /// it is considered UTF8, otherwise we treat it as some 8-bit encoding. inline Encoding detectEncoding(StringRef Text) { - const UTF8 *Ptr = reinterpret_cast<const UTF8 *>(Text.begin()); - const UTF8 *BufEnd = reinterpret_cast<const UTF8 *>(Text.end()); - if (::isLegalUTF8String(&Ptr, BufEnd)) + const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin()); + const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end()); + if (llvm::isLegalUTF8String(&Ptr, BufEnd)) return Encoding_UTF8; return Encoding_Unknown; } -inline unsigned getCodePointCountUTF8(StringRef Text) { - unsigned CodePoints = 0; - for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) { - ++CodePoints; - } - return CodePoints; -} - -/// \brief Gets the number of code points in the Text using the specified -/// Encoding. -inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) { - switch (Encoding) { - case Encoding_UTF8: - return getCodePointCountUTF8(Text); - default: - return Text.size(); - } -} - /// \brief Returns the number of columns required to display the \p Text on a /// generic Unicode-capable terminal. Text is assumed to use the specified /// \p Encoding. @@ -97,7 +78,7 @@ inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { switch (Encoding) { case Encoding_UTF8: - return getNumBytesForUTF8(FirstChar); + return llvm::getNumBytesForUTF8(FirstChar); default: return 1; } @@ -136,7 +117,7 @@ inline unsigned getEscapeSequenceLength(StringRef Text) { ++I; return I; } - return 1 + getNumBytesForUTF8(Text[1]); + return 1 + llvm::getNumBytesForUTF8(Text[1]); } } diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 32d6bb855ad6e..70b90d6fa14e0 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/YAMLTraits.h" #include <algorithm> #include <memory> -#include <queue> #include <string> #define DEBUG_TYPE "format-formatter" @@ -53,6 +52,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); IO.enumCase(Value, "Java", FormatStyle::LK_Java); IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); + IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC); IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); } @@ -339,6 +339,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("ReflowComments", Style.ReflowComments); IO.mapOptional("SortIncludes", Style.SortIncludes); IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); + IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword); IO.mapOptional("SpaceBeforeAssignmentOperators", Style.SpaceBeforeAssignmentOperators); IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); @@ -420,7 +421,7 @@ std::error_code make_error_code(ParseError e) { return std::error_code(static_cast<int>(e), getParseCategory()); } -const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { +const char *ParseErrorCategory::name() const noexcept { return "clang-format.parse_error"; } @@ -553,6 +554,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesInContainerLiterals = true; LLVMStyle.SpacesInCStyleCastParentheses = false; LLVMStyle.SpaceAfterCStyleCast = false; + LLVMStyle.SpaceAfterTemplateKeyword = true; LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; LLVMStyle.SpaceBeforeAssignmentOperators = true; LLVMStyle.SpacesInAngles = false; @@ -609,10 +611,11 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { } else if (Language == FormatStyle::LK_JavaScript) { GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; GoogleStyle.AlignOperands = false; - GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; + GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.BreakBeforeTernaryOperators = false; - GoogleStyle.CommentPragmas = "@(export|requirecss|return|see|visibility) "; + GoogleStyle.CommentPragmas = + "(taze:|@(export|requirecss|return|returns|see|visibility)) "; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; GoogleStyle.SpacesInContainerLiterals = false; @@ -621,6 +624,8 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { } else if (Language == FormatStyle::LK_Proto) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; GoogleStyle.SpacesInContainerLiterals = false; + } else if (Language == FormatStyle::LK_ObjC) { + GoogleStyle.ColumnLimit = 100; } return GoogleStyle; @@ -650,10 +655,12 @@ FormatStyle getMozillaStyle() { MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; MozillaStyle.AlwaysBreakAfterReturnType = - FormatStyle::RTBS_TopLevelDefinitions; + FormatStyle::RTBS_TopLevel; MozillaStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel; MozillaStyle.AlwaysBreakTemplateDeclarations = true; + MozillaStyle.BinPackParameters = false; + MozillaStyle.BinPackArguments = false; MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; MozillaStyle.BreakConstructorInitializersBeforeComma = true; MozillaStyle.ConstructorInitializerIndentWidth = 2; @@ -664,6 +671,7 @@ FormatStyle getMozillaStyle() { MozillaStyle.ObjCSpaceBeforeProtocolList = false; MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; + MozillaStyle.SpaceAfterTemplateKeyword = false; return MozillaStyle; } @@ -683,7 +691,6 @@ FormatStyle getWebKitStyle() { Style.ObjCBlockIndentWidth = 4; Style.ObjCSpaceAfterProperty = true; Style.PointerAlignment = FormatStyle::PAS_Left; - Style.Standard = FormatStyle::LS_Cpp03; return Style; } @@ -791,46 +798,25 @@ std::string configurationAsText(const FormatStyle &Style) { namespace { -class Formatter : public TokenAnalyzer { +class JavaScriptRequoter : public TokenAnalyzer { public: - Formatter(const Environment &Env, const FormatStyle &Style, - bool *IncompleteFormat) - : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {} + JavaScriptRequoter(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} tooling::Replacements analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens, tooling::Replacements &Result) override { - deriveLocalStyle(AnnotatedLines); + FormatTokenLexer &Tokens) override { AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); - - if (Style.Language == FormatStyle::LK_JavaScript && - Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) - requoteJSStringLiteral(AnnotatedLines, Result); - - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - Annotator.calculateFormattingInformation(*AnnotatedLines[i]); - } - - Annotator.setCommentLineLevels(AnnotatedLines); - - WhitespaceManager Whitespaces( - Env.getSourceManager(), Style, - inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID()))); - ContinuationIndenter Indenter(Style, Tokens.getKeywords(), - Env.getSourceManager(), Whitespaces, Encoding, - BinPackInconclusiveFunctions); - UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), - IncompleteFormat) - .format(AnnotatedLines); - return Whitespaces.generateReplacements(); + tooling::Replacements Result; + requoteJSStringLiteral(AnnotatedLines, Result); + return Result; } private: - // If the last token is a double/single-quoted string literal, generates a - // replacement with a single/double quoted string literal, re-escaping the - // contents in the process. + // Replaces double/single-quoted string literal as appropriate, re-escaping + // the contents in the process. void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result) { for (AnnotatedLine *Line : Lines) { @@ -842,8 +828,7 @@ private: StringRef Input = FormatTok->TokenText; if (FormatTok->Finalized || !FormatTok->isStringLiteral() || // NB: testing for not starting with a double quote to avoid - // breaking - // `template strings`. + // breaking `template strings`. (Style.JavaScriptQuotes == FormatStyle::JSQS_Single && !Input.startswith("\"")) || (Style.JavaScriptQuotes == FormatStyle::JSQS_Double && @@ -855,15 +840,20 @@ private: SourceLocation Start = FormatTok->Tok.getLocation(); auto Replace = [&](SourceLocation Start, unsigned Length, StringRef ReplacementText) { - Result.insert(tooling::Replacement(Env.getSourceManager(), Start, - Length, ReplacementText)); + auto Err = Result.add(tooling::Replacement( + Env.getSourceManager(), Start, Length, ReplacementText)); + // FIXME: handle error. For now, print error message and skip the + // replacement for release version. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + assert(false); + } }; Replace(Start, 1, IsSingle ? "'" : "\""); Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1, IsSingle ? "'" : "\""); // Escape internal quotes. - size_t ColumnWidth = FormatTok->TokenText.size(); bool Escaped = false; for (size_t i = 1; i < Input.size() - 1; i++) { switch (Input[i]) { @@ -873,7 +863,6 @@ private: (!IsSingle && Input[i + 1] == '\''))) { // Remove this \, it's escaping a " or ' that no longer needs // escaping - ColumnWidth--; Replace(Start.getLocWithOffset(i), 1, ""); continue; } @@ -884,7 +873,6 @@ private: if (!Escaped && IsSingle == (Input[i] == '\'')) { // Escape the quote. Replace(Start.getLocWithOffset(i), 0, "\\"); - ColumnWidth++; } Escaped = false; break; @@ -893,16 +881,46 @@ private: break; } } - - // For formatting, count the number of non-escaped single quotes in them - // and adjust ColumnWidth to take the added escapes into account. - // FIXME(martinprobst): this might conflict with code breaking a long - // string literal (which clang-format doesn't do, yet). For that to - // work, this code would have to modify TokenText directly. - FormatTok->ColumnWidth = ColumnWidth; } } } +}; + +class Formatter : public TokenAnalyzer { +public: + Formatter(const Environment &Env, const FormatStyle &Style, + bool *IncompleteFormat) + : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {} + + tooling::Replacements + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override { + tooling::Replacements Result; + deriveLocalStyle(AnnotatedLines); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), + AnnotatedLines.end()); + for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { + Annotator.calculateFormattingInformation(*AnnotatedLines[i]); + } + Annotator.setCommentLineLevels(AnnotatedLines); + + WhitespaceManager Whitespaces( + Env.getSourceManager(), Style, + inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID()))); + ContinuationIndenter Indenter(Style, Tokens.getKeywords(), + Env.getSourceManager(), Whitespaces, Encoding, + BinPackInconclusiveFunctions); + UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), + IncompleteFormat) + .format(AnnotatedLines); + for (const auto &R : Whitespaces.generateReplacements()) + if (Result.add(R)) + return Result; + return Result; + } + +private: static bool inputUsesCRLF(StringRef Text) { return Text.count('\r') * 2 > Text.count('\n'); @@ -991,7 +1009,7 @@ public: tooling::Replacements analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens, tooling::Replacements &Result) override { + FormatTokenLexer &Tokens) override { // FIXME: in the current implementation the granularity of affected range // is an annotated line. However, this is not sufficient. Furthermore, // redundant code introduced by replacements does not necessarily @@ -1008,8 +1026,11 @@ public: if (Line->Affected) { cleanupRight(Line->First, tok::comma, tok::comma); cleanupRight(Line->First, TT_CtorInitializerColon, tok::comma); + cleanupRight(Line->First, tok::l_paren, tok::comma); + cleanupLeft(Line->First, tok::comma, tok::r_paren); cleanupLeft(Line->First, TT_CtorInitializerComma, tok::l_brace); cleanupLeft(Line->First, TT_CtorInitializerColon, tok::l_brace); + cleanupLeft(Line->First, TT_CtorInitializerColon, tok::equal); } } @@ -1027,11 +1048,12 @@ private: // Iterate through all lines and remove any empty (nested) namespaces. void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { + std::set<unsigned> DeletedLines; for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { auto &Line = *AnnotatedLines[i]; if (Line.startsWith(tok::kw_namespace) || Line.startsWith(tok::kw_inline, tok::kw_namespace)) { - checkEmptyNamespace(AnnotatedLines, i, i); + checkEmptyNamespace(AnnotatedLines, i, i, DeletedLines); } } @@ -1049,7 +1071,8 @@ private: // sets \p NewLine to the last line checked. // Returns true if the current namespace is empty. bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - unsigned CurrentLine, unsigned &NewLine) { + unsigned CurrentLine, unsigned &NewLine, + std::set<unsigned> &DeletedLines) { unsigned InitLine = CurrentLine, End = AnnotatedLines.size(); if (Style.BraceWrapping.AfterNamespace) { // If the left brace is in a new line, we should consume it first so that @@ -1069,7 +1092,8 @@ private: if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) || AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline, tok::kw_namespace)) { - if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine)) + if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine, + DeletedLines)) return false; CurrentLine = NewLine; continue; @@ -1121,6 +1145,8 @@ private: break; if (Left->is(LK) && Right->is(RK)) { deleteToken(DeleteLeft ? Left : Right); + for (auto *Tok = Left->Next; Tok && Tok != Right; Tok = Tok->Next) + deleteToken(Tok); // If the right token is deleted, we should keep the left token // unchanged and pair it with the new right token. if (!DeleteLeft) @@ -1164,7 +1190,14 @@ private: } auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(), Tokens[End]->Tok.getEndLoc()); - Fixes.insert(tooling::Replacement(Env.getSourceManager(), SR, "")); + auto Err = + Fixes.add(tooling::Replacement(Env.getSourceManager(), SR, "")); + // FIXME: better error handling. for now just print error message and skip + // for the release version. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + assert(false && "Fixes must not conflict!"); + } Idx = End + 1; } @@ -1186,8 +1219,6 @@ private: // Tokens to be deleted. std::set<FormatToken *, FormatTokenLess> DeletedTokens; - // The line numbers of lines to be deleted. - std::set<unsigned> DeletedLines; }; struct IncludeDirective { @@ -1210,15 +1241,50 @@ static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start, return false; } -// Sorts a block of includes given by 'Includes' alphabetically adding the -// necessary replacement to 'Replaces'. 'Includes' must be in strict source -// order. +// Returns a pair (Index, OffsetToEOL) describing the position of the cursor +// before sorting/deduplicating. Index is the index of the include under the +// cursor in the original set of includes. If this include has duplicates, it is +// the index of the first of the duplicates as the others are going to be +// removed. OffsetToEOL describes the cursor's position relative to the end of +// its current line. +// If `Cursor` is not on any #include, `Index` will be UINT_MAX. +static std::pair<unsigned, unsigned> +FindCursorIndex(const SmallVectorImpl<IncludeDirective> &Includes, + const SmallVectorImpl<unsigned> &Indices, unsigned Cursor) { + unsigned CursorIndex = UINT_MAX; + unsigned OffsetToEOL = 0; + for (int i = 0, e = Includes.size(); i != e; ++i) { + unsigned Start = Includes[Indices[i]].Offset; + unsigned End = Start + Includes[Indices[i]].Text.size(); + if (!(Cursor >= Start && Cursor < End)) + continue; + CursorIndex = Indices[i]; + OffsetToEOL = End - Cursor; + // Put the cursor on the only remaining #include among the duplicate + // #includes. + while (--i >= 0 && Includes[CursorIndex].Text == Includes[Indices[i]].Text) + CursorIndex = i; + break; + } + return std::make_pair(CursorIndex, OffsetToEOL); +} + +// Sorts and deduplicate a block of includes given by 'Includes' alphabetically +// adding the necessary replacement to 'Replaces'. 'Includes' must be in strict +// source order. +// #include directives with the same text will be deduplicated, and only the +// first #include in the duplicate #includes remains. If the `Cursor` is +// provided and put on a deleted #include, it will be moved to the remaining +// #include in the duplicate #includes. static void sortCppIncludes(const FormatStyle &Style, - const SmallVectorImpl<IncludeDirective> &Includes, - ArrayRef<tooling::Range> Ranges, StringRef FileName, - tooling::Replacements &Replaces, unsigned *Cursor) { - if (!affectsRange(Ranges, Includes.front().Offset, - Includes.back().Offset + Includes.back().Text.size())) + const SmallVectorImpl<IncludeDirective> &Includes, + ArrayRef<tooling::Range> Ranges, StringRef FileName, + tooling::Replacements &Replaces, unsigned *Cursor) { + unsigned IncludesBeginOffset = Includes.front().Offset; + unsigned IncludesEndOffset = + Includes.back().Offset + Includes.back().Text.size(); + unsigned IncludesBlockSize = IncludesEndOffset - IncludesBeginOffset; + if (!affectsRange(Ranges, IncludesBeginOffset, IncludesEndOffset)) return; SmallVector<unsigned, 16> Indices; for (unsigned i = 0, e = Includes.size(); i != e; ++i) @@ -1228,37 +1294,45 @@ static void sortCppIncludes(const FormatStyle &Style, return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) < std::tie(Includes[RHSI].Category, Includes[RHSI].Filename); }); + // The index of the include on which the cursor will be put after + // sorting/deduplicating. + unsigned CursorIndex; + // The offset from cursor to the end of line. + unsigned CursorToEOLOffset; + if (Cursor) + std::tie(CursorIndex, CursorToEOLOffset) = + FindCursorIndex(Includes, Indices, *Cursor); + + // Deduplicate #includes. + Indices.erase(std::unique(Indices.begin(), Indices.end(), + [&](unsigned LHSI, unsigned RHSI) { + return Includes[LHSI].Text == Includes[RHSI].Text; + }), + Indices.end()); // If the #includes are out of order, we generate a single replacement fixing // the entire block. Otherwise, no replacement is generated. - if (std::is_sorted(Indices.begin(), Indices.end())) + if (Indices.size() == Includes.size() && + std::is_sorted(Indices.begin(), Indices.end())) return; std::string result; - bool CursorMoved = false; for (unsigned Index : Indices) { if (!result.empty()) result += "\n"; result += Includes[Index].Text; - - if (Cursor && !CursorMoved) { - unsigned Start = Includes[Index].Offset; - unsigned End = Start + Includes[Index].Text.size(); - if (*Cursor >= Start && *Cursor < End) { - *Cursor = Includes.front().Offset + result.size() + *Cursor - End; - CursorMoved = true; - } - } + if (Cursor && CursorIndex == Index) + *Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset; } - // Sorting #includes shouldn't change their total number of characters. - // This would otherwise mess up 'Ranges'. - assert(result.size() == - Includes.back().Offset + Includes.back().Text.size() - - Includes.front().Offset); - - Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset, - result.size(), result)); + auto Err = Replaces.add(tooling::Replacement( + FileName, Includes.front().Offset, IncludesBlockSize, result)); + // FIXME: better error handling. For now, just skip the replacement for the + // release version. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + assert(false); + } } namespace { @@ -1403,14 +1477,13 @@ processReplacements(T ProcessFunc, StringRef Code, auto NewCode = applyAllReplacements(Code, Replaces); if (!NewCode) return NewCode.takeError(); - std::vector<tooling::Range> ChangedRanges = - tooling::calculateChangedRanges(Replaces); + std::vector<tooling::Range> ChangedRanges = Replaces.getAffectedRanges(); StringRef FileName = Replaces.begin()->getFilePath(); tooling::Replacements FormatReplaces = ProcessFunc(Style, *NewCode, ChangedRanges, FileName); - return mergeReplacements(Replaces, FormatReplaces); + return Replaces.merge(FormatReplaces); } llvm::Expected<tooling::Replacements> @@ -1441,14 +1514,31 @@ formatReplacements(StringRef Code, const tooling::Replacements &Replaces, namespace { inline bool isHeaderInsertion(const tooling::Replacement &Replace) { - return Replace.getOffset() == UINT_MAX && + return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 && llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText()); } -void skipComments(Lexer &Lex, Token &Tok) { - while (Tok.is(tok::comment)) - if (Lex.LexFromRawLexer(Tok)) - return; +inline bool isHeaderDeletion(const tooling::Replacement &Replace) { + return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1; +} + +// Returns the offset after skipping a sequence of tokens, matched by \p +// GetOffsetAfterSequence, from the start of the code. +// \p GetOffsetAfterSequence should be a function that matches a sequence of +// tokens and returns an offset after the sequence. +unsigned getOffsetAfterTokenSequence( + StringRef FileName, StringRef Code, const FormatStyle &Style, + std::function<unsigned(const SourceManager &, Lexer &, Token &)> + GetOffsetAfterSequense) { + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); + const SourceManager &SourceMgr = Env->getSourceManager(); + Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, + getFormattingLangOpts(Style)); + Token Tok; + // Get the first token. + Lex.LexFromRawLexer(Tok); + return GetOffsetAfterSequense(SourceMgr, Lex, Tok); } // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, @@ -1464,32 +1554,90 @@ bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) { return Matched; } +void skipComments(Lexer &Lex, Token &Tok) { + while (Tok.is(tok::comment)) + if (Lex.LexFromRawLexer(Tok)) + return; +} + +// Returns the offset after header guard directives and any comments +// before/after header guards. If no header guard presents in the code, this +// will returns the offset after skipping all comments from the start of the +// code. unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, StringRef Code, const FormatStyle &Style) { - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); - const SourceManager &SourceMgr = Env->getSourceManager(); - Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, - getFormattingLangOpts(Style)); - Token Tok; - // Get the first token. - Lex.LexFromRawLexer(Tok); - skipComments(Lex, Tok); - unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation()); - if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { - skipComments(Lex, Tok); - if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) - return SourceMgr.getFileOffset(Tok.getLocation()); + return getOffsetAfterTokenSequence( + FileName, Code, Style, + [](const SourceManager &SM, Lexer &Lex, Token Tok) { + skipComments(Lex, Tok); + unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); + if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { + skipComments(Lex, Tok); + if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) + return SM.getFileOffset(Tok.getLocation()); + } + return InitialOffset; + }); +} + +// Check if a sequence of tokens is like +// "#include ("header.h" | <header.h>)". +// If it is, \p Tok will be the token after this directive; otherwise, it can be +// any token after the given \p Tok (including \p Tok). +bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { + auto Matched = [&]() { + Lex.LexFromRawLexer(Tok); + return true; + }; + if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && + Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") { + if (Lex.LexFromRawLexer(Tok)) + return false; + if (Tok.is(tok::string_literal)) + return Matched(); + if (Tok.is(tok::less)) { + while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) { + } + if (Tok.is(tok::greater)) + return Matched(); + } } - return AfterComments; + return false; +} + +// Returns the offset of the last #include directive after which a new +// #include can be inserted. This ignores #include's after the #include block(s) +// in the beginning of a file to avoid inserting headers into code sections +// where new #include's should not be added by default. +// These code sections include: +// - raw string literals (containing #include). +// - #if blocks. +// - Special #include's among declarations (e.g. functions). +// +// If no #include after which a new #include can be inserted, this returns the +// offset after skipping all comments from the start of the code. +// Inserting after an #include is not allowed if it comes after code that is not +// #include (e.g. pre-processing directive that is not #include, declarations). +unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, + const FormatStyle &Style) { + return getOffsetAfterTokenSequence( + FileName, Code, Style, + [](const SourceManager &SM, Lexer &Lex, Token Tok) { + skipComments(Lex, Tok); + unsigned MaxOffset = SM.getFileOffset(Tok.getLocation()); + while (checkAndConsumeInclusiveDirective(Lex, Tok)) + MaxOffset = SM.getFileOffset(Tok.getLocation()); + return MaxOffset; + }); +} + +bool isDeletedHeader(llvm::StringRef HeaderName, + const std::set<llvm::StringRef> &HeadersToDelete) { + return HeadersToDelete.count(HeaderName) || + HeadersToDelete.count(HeaderName.trim("\"<>")); } -// FIXME: we also need to insert a '\n' at the end of the code if we have an -// insertion with offset Code.size(), and there is no '\n' at the end of the -// code. -// FIXME: do not insert headers into conditional #include blocks, e.g. #includes -// surrounded by compile condition "#if...". // FIXME: insert empty lines between newly created blocks. tooling::Replacements fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, @@ -1498,20 +1646,25 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, return Replaces; tooling::Replacements HeaderInsertions; + std::set<llvm::StringRef> HeadersToDelete; + tooling::Replacements Result; for (const auto &R : Replaces) { - if (isHeaderInsertion(R)) - HeaderInsertions.insert(R); - else if (R.getOffset() == UINT_MAX) + if (isHeaderInsertion(R)) { + // Replacements from \p Replaces must be conflict-free already, so we can + // simply consume the error. + llvm::consumeError(HeaderInsertions.add(R)); + } else if (isHeaderDeletion(R)) { + HeadersToDelete.insert(R.getReplacementText()); + } else if (R.getOffset() == UINT_MAX) { llvm::errs() << "Insertions other than header #include insertion are " "not supported! " << R.getReplacementText() << "\n"; + } else { + llvm::consumeError(Result.add(R)); + } } - if (HeaderInsertions.empty()) + if (HeaderInsertions.empty() && HeadersToDelete.empty()) return Replaces; - tooling::Replacements Result; - std::set_difference(Replaces.begin(), Replaces.end(), - HeaderInsertions.begin(), HeaderInsertions.end(), - std::inserter(Result, Result.begin())); llvm::Regex IncludeRegex(IncludeRegexPattern); llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)"); @@ -1532,6 +1685,10 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, unsigned MinInsertOffset = getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style); StringRef TrimmedCode = Code.drop_front(MinInsertOffset); + // Max insertion offset in the original code. + unsigned MaxInsertOffset = + MinInsertOffset + + getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style); SmallVector<StringRef, 32> Lines; TrimmedCode.split(Lines, '\n'); unsigned Offset = MinInsertOffset; @@ -1540,13 +1697,30 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, for (auto Line : Lines) { NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); if (IncludeRegex.match(Line, &Matches)) { + // The header name with quotes or angle brackets. StringRef IncludeName = Matches[2]; ExistingIncludes.insert(IncludeName); - int Category = Categories.getIncludePriority( - IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); - CategoryEndOffsets[Category] = NextLineOffset; - if (FirstIncludeOffset < 0) - FirstIncludeOffset = Offset; + // Only record the offset of current #include if we can insert after it. + if (Offset <= MaxInsertOffset) { + int Category = Categories.getIncludePriority( + IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); + CategoryEndOffsets[Category] = NextLineOffset; + if (FirstIncludeOffset < 0) + FirstIncludeOffset = Offset; + } + if (isDeletedHeader(IncludeName, HeadersToDelete)) { + // If this is the last line without trailing newline, we need to make + // sure we don't delete across the file boundary. + unsigned Length = std::min(Line.size() + 1, Code.size() - Offset); + llvm::Error Err = + Result.add(tooling::Replacement(FileName, Offset, Length, "")); + if (Err) { + // Ignore the deletion on conflict. + llvm::errs() << "Failed to add header deletion replacement for " + << IncludeName << ": " << llvm::toString(std::move(Err)) + << "\n"; + } + } } Offset = NextLineOffset; } @@ -1570,6 +1744,7 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end()) CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)]; + bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n'; for (const auto &R : HeaderInsertions) { auto IncludeDirective = R.getReplacementText(); bool Matched = IncludeRegex.match(IncludeDirective, &Matches); @@ -1588,7 +1763,20 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, std::string NewInclude = !IncludeDirective.endswith("\n") ? (IncludeDirective + "\n").str() : IncludeDirective.str(); - Result.insert(tooling::Replacement(FileName, Offset, 0, NewInclude)); + // When inserting headers at end of the code, also append '\n' to the code + // if it does not end with '\n'. + if (NeedNewLineAtEnd && Offset == Code.size()) { + NewInclude = "\n" + NewInclude; + NeedNewLineAtEnd = false; + } + auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude); + auto Err = Result.add(NewReplace); + if (Err) { + llvm::consumeError(std::move(Err)); + unsigned NewOffset = Result.getShiftedCodePosition(Offset); + NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude); + Result = Result.merge(tooling::Replacements(NewReplace)); + } } return Result; } @@ -1611,18 +1799,6 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, return processReplacements(Cleanup, Code, NewReplaces, Style); } -tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SM, - FileID ID, ArrayRef<CharSourceRange> Ranges, - bool *IncompleteFormat) { - FormatStyle Expanded = expandPresets(Style); - if (Expanded.DisableFormat) - return tooling::Replacements(); - - Environment Env(SM, ID, Ranges); - Formatter Format(Env, Expanded, IncompleteFormat); - return Format.process(); -} - tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName, bool *IncompleteFormat) { @@ -1630,19 +1806,28 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, if (Expanded.DisableFormat) return tooling::Replacements(); - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + auto Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + + if (Style.Language == FormatStyle::LK_JavaScript && + Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) { + JavaScriptRequoter Requoter(*Env, Expanded); + tooling::Replacements Requotes = Requoter.process(); + if (!Requotes.empty()) { + auto NewCode = applyAllReplacements(Code, Requotes); + if (NewCode) { + auto NewEnv = Environment::CreateVirtualEnvironment( + *NewCode, FileName, + tooling::calculateRangesAfterReplacements(Requotes, Ranges)); + Formatter Format(*NewEnv, Expanded, IncompleteFormat); + return Requotes.merge(Format.process()); + } + } + } + Formatter Format(*Env, Expanded, IncompleteFormat); return Format.process(); } -tooling::Replacements cleanup(const FormatStyle &Style, SourceManager &SM, - FileID ID, ArrayRef<CharSourceRange> Ranges) { - Environment Env(SM, ID, Ranges); - Cleaner Clean(Env, Style); - return Clean.process(); -} - tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName) { @@ -1684,6 +1869,8 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { return FormatStyle::LK_Java; if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) return FormatStyle::LK_JavaScript; // JavaScript or TypeScript. + if (FileName.endswith(".m") || FileName.endswith(".mm")) + return FormatStyle::LK_ObjC; if (FileName.endswith_lower(".proto") || FileName.endswith_lower(".protodevel")) return FormatStyle::LK_Proto; @@ -1693,12 +1880,21 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { } FormatStyle getStyle(StringRef StyleName, StringRef FileName, - StringRef FallbackStyle, vfs::FileSystem *FS) { + StringRef FallbackStyle, StringRef Code, + vfs::FileSystem *FS) { if (!FS) { FS = vfs::getRealFileSystem().get(); } FormatStyle Style = getLLVMStyle(); Style.Language = getLanguageByFileName(FileName); + + // This is a very crude detection of whether a header contains ObjC code that + // should be improved over time and probably be done on tokens, not one the + // bare content of the file. + if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") && + (Code.contains("\n- (") || Code.contains("\n+ ("))) + Style.Language = FormatStyle::LK_ObjC; + if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { llvm::errs() << "Invalid fallback style \"" << FallbackStyle << "\" using LLVM style\n"; @@ -1724,7 +1920,11 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName, // Look for .clang-format/_clang-format file in the file's parent directories. SmallString<128> UnsuitableConfigFiles; SmallString<128> Path(FileName); - llvm::sys::fs::make_absolute(Path); + if (std::error_code EC = FS->makeAbsolute(Path)) { + llvm::errs() << EC.message() << "\n"; + return Style; + } + for (StringRef Directory = Path; !Directory.empty(); Directory = llvm::sys::path::parent_path(Directory)) { diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp index 2ae4ddcfd08a0..ba5bf03a63464 100644 --- a/lib/Format/FormatToken.cpp +++ b/lib/Format/FormatToken.cpp @@ -13,9 +13,8 @@ /// //===----------------------------------------------------------------------===// -#include "ContinuationIndenter.h" #include "FormatToken.h" -#include "clang/Format/Format.h" +#include "ContinuationIndenter.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include <climits> @@ -78,6 +77,9 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, if (State.NextToken == nullptr || !State.NextToken->Previous) return 0; + if (Formats.size() == 1) + return 0; // Handled by formatFromToken + // Ensure that we start on the opening brace. const FormatToken *LBrace = State.NextToken->Previous->getPreviousNonComment(); @@ -93,6 +95,7 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, // Find the best ColumnFormat, i.e. the best number of columns to use. const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); + // If no ColumnFormat can be used, the braced list would generally be // bin-packed. Add a severe penalty to this so that column layouts are // preferred if possible. @@ -130,7 +133,9 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, unsigned CommaSeparatedList::formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) { - if (HasNestedBracedList) + // Formatting with 1 Column isn't really a column layout, so we don't need the + // special logic here. We can just avoid bin packing any of the parameters. + if (Formats.size() == 1 || HasNestedBracedList) State.Stack.back().AvoidBinPacking = true; return 0; } @@ -274,7 +279,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { continue; // Ignore layouts that are bound to violate the column limit. - if (Format.TotalWidth > Style.ColumnLimit) + if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) continue; Formats.push_back(Format); @@ -288,7 +293,7 @@ CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { I = Formats.rbegin(), E = Formats.rend(); I != E; ++I) { - if (I->TotalWidth <= RemainingCharacters) { + if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) { if (BestFormat && I->LineCount > BestFormat->LineCount) break; BestFormat = &*I; diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index 43b1625136201..ea3bbe368d5b0 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -396,6 +396,21 @@ struct FormatToken { } } + /// \brief Returns \c true if this is a string literal that's like a label, + /// e.g. ends with "=" or ":". + bool isLabelString() const { + if (!is(tok::string_literal)) + return false; + StringRef Content = TokenText; + if (Content.startswith("\"") || Content.startswith("'")) + Content = Content.drop_front(1); + if (Content.endswith("\"") || Content.endswith("'")) + Content = Content.drop_back(1); + Content = Content.trim(); + return Content.size() > 1 && + (Content.back() == ':' || Content.back() == '='); + } + /// \brief Returns actual token start location without leading escaped /// newlines and whitespace. /// @@ -580,12 +595,14 @@ struct AdditionalKeywords { kw_as = &IdentTable.get("as"); kw_async = &IdentTable.get("async"); kw_await = &IdentTable.get("await"); + kw_declare = &IdentTable.get("declare"); kw_finally = &IdentTable.get("finally"); kw_from = &IdentTable.get("from"); kw_function = &IdentTable.get("function"); kw_import = &IdentTable.get("import"); kw_is = &IdentTable.get("is"); kw_let = &IdentTable.get("let"); + kw_module = &IdentTable.get("module"); kw_type = &IdentTable.get("type"); kw_var = &IdentTable.get("var"); kw_yield = &IdentTable.get("yield"); @@ -632,12 +649,14 @@ struct AdditionalKeywords { IdentifierInfo *kw_as; IdentifierInfo *kw_async; IdentifierInfo *kw_await; + IdentifierInfo *kw_declare; IdentifierInfo *kw_finally; IdentifierInfo *kw_from; IdentifierInfo *kw_function; IdentifierInfo *kw_import; IdentifierInfo *kw_is; IdentifierInfo *kw_let; + IdentifierInfo *kw_module; IdentifierInfo *kw_type; IdentifierInfo *kw_var; IdentifierInfo *kw_yield; diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp index 9778f84732d64..46a32a917dd93 100644 --- a/lib/Format/FormatTokenLexer.cpp +++ b/lib/Format/FormatTokenLexer.cpp @@ -26,12 +26,11 @@ namespace format { FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, const FormatStyle &Style, encoding::Encoding Encoding) - : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false), - LessStashed(false), Column(0), TrailingWhitespace(0), - SourceMgr(SourceMgr), ID(ID), Style(Style), - IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), - Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), - MacroBlockBeginRegex(Style.MacroBlockBegin), + : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), + Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Style(Style), IdentTable(getFormattingLangOpts(Style)), + Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), + FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), MacroBlockEndRegex(Style.MacroBlockEnd) { Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, getFormattingLangOpts(Style))); @@ -49,7 +48,7 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { Tokens.push_back(getNextToken()); if (Style.Language == FormatStyle::LK_JavaScript) { tryParseJSRegexLiteral(); - tryParseTemplateString(); + handleTemplateStrings(); } tryMergePreviousTokens(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) @@ -228,17 +227,44 @@ void FormatTokenLexer::tryParseJSRegexLiteral() { resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); } -void FormatTokenLexer::tryParseTemplateString() { +void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); - if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`") + + if (BacktickToken->is(tok::l_brace)) { + StateStack.push(LexerState::NORMAL); return; + } + if (BacktickToken->is(tok::r_brace)) { + if (StateStack.size() == 1) + return; + StateStack.pop(); + if (StateStack.top() != LexerState::TEMPLATE_STRING) + return; + // If back in TEMPLATE_STRING, fallthrough and continue parsing the + } else if (BacktickToken->is(tok::unknown) && + BacktickToken->TokenText == "`") { + StateStack.push(LexerState::TEMPLATE_STRING); + } else { + return; // Not actually a template + } // 'Manually' lex ahead in the current file buffer. const char *Offset = Lex->getBufferLocation(); const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`" - for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) { - if (*Offset == '\\') + for (; Offset != Lex->getBuffer().end(); ++Offset) { + if (Offset[0] == '`') { + StateStack.pop(); + break; + } + if (Offset[0] == '\\') { ++Offset; // Skip the escaped character. + } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' && + Offset[1] == '{') { + // '${' introduces an expression interpolation in the template string. + StateStack.push(LexerState::NORMAL); + ++Offset; + break; + } } StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1); @@ -262,7 +288,10 @@ void FormatTokenLexer::tryParseTemplateString() { Style.TabWidth, Encoding); } - resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1))); + SourceLocation loc = Offset < Lex->getBuffer().end() + ? Lex->getSourceLocation(Offset + 1) + : SourceMgr.getLocForEndOfFile(ID); + resetLexer(SourceMgr.getFileOffset(loc)); } bool FormatTokenLexer::tryMerge_TMacro() { @@ -384,12 +413,8 @@ FormatToken *FormatTokenLexer::getStashedToken() { } FormatToken *FormatTokenLexer::getNextToken() { - if (GreaterStashed) { - GreaterStashed = false; - return getStashedToken(); - } - if (LessStashed) { - LessStashed = false; + if (StateStack.top() == LexerState::TOKEN_STASHED) { + StateStack.pop(); return getStashedToken(); } @@ -500,11 +525,13 @@ FormatToken *FormatTokenLexer::getNextToken() { } else if (FormatTok->Tok.is(tok::greatergreater)) { FormatTok->Tok.setKind(tok::greater); FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); - GreaterStashed = true; + ++Column; + StateStack.push(LexerState::TOKEN_STASHED); } else if (FormatTok->Tok.is(tok::lessless)) { FormatTok->Tok.setKind(tok::less); FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); - LessStashed = true; + ++Column; + StateStack.push(LexerState::TOKEN_STASHED); } // Now FormatTok is the next non-whitespace token. @@ -531,7 +558,8 @@ FormatToken *FormatTokenLexer::getNextToken() { Column = FormatTok->LastLineColumnWidth; } - if (Style.Language == FormatStyle::LK_Cpp) { + if (Style.Language == FormatStyle::LK_Cpp || + Style.Language == FormatStyle::LK_ObjC) { if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_define) && diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h index fa8c8882574f7..c47b0e725d366 100644 --- a/lib/Format/FormatTokenLexer.h +++ b/lib/Format/FormatTokenLexer.h @@ -23,9 +23,17 @@ #include "clang/Format/Format.h" #include "llvm/Support/Regex.h" +#include <stack> + namespace clang { namespace format { +enum LexerState { + NORMAL, + TEMPLATE_STRING, + TOKEN_STASHED, +}; + class FormatTokenLexer { public: FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, @@ -53,7 +61,16 @@ private: // its text if successful. void tryParseJSRegexLiteral(); - void tryParseTemplateString(); + // Handles JavaScript template strings. + // + // JavaScript template strings use backticks ('`') as delimiters, and allow + // embedding expressions nested in ${expr-here}. Template strings can be + // nested recursively, i.e. expressions can contain template strings in turn. + // + // The code below parses starting from a backtick, up to a closing backtick or + // an opening ${. It also maintains a stack of lexing contexts to handle + // nested template parts by balancing curly braces. + void handleTemplateStrings(); bool tryMerge_TMacro(); @@ -65,7 +82,7 @@ private: FormatToken *FormatTok; bool IsFirstToken; - bool GreaterStashed, LessStashed; + std::stack<LexerState> StateStack; unsigned Column; unsigned TrailingWhitespace; std::unique_ptr<Lexer> Lex; diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp index 32d5d756a3f09..e73695ca84770 100644 --- a/lib/Format/SortJavaScriptImports.cpp +++ b/lib/Format/SortJavaScriptImports.cpp @@ -1,4 +1,4 @@ -//===--- SortJavaScriptImports.h - Sort ES6 Imports -------------*- C++ -*-===// +//===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "SortJavaScriptImports.h" -#include "SortJavaScriptImports.h" #include "TokenAnalyzer.h" #include "TokenAnnotator.h" #include "clang/Basic/Diagnostic.h" @@ -127,7 +126,8 @@ public: tooling::Replacements analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens, tooling::Replacements &Result) override { + FormatTokenLexer &Tokens) override { + tooling::Replacements Result; AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); @@ -192,9 +192,15 @@ public: DEBUG(llvm::dbgs() << "Replacing imports:\n" << getSourceText(InsertionPoint) << "\nwith:\n" << ReferencesText << "\n"); - Result.insert(tooling::Replacement( + auto Err = Result.add(tooling::Replacement( Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint), ReferencesText)); + // FIXME: better error handling. For now, just print error message and skip + // the replacement for the release version. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + assert(false); + } return Result; } @@ -276,16 +282,9 @@ private: SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { SmallVector<JsModuleReference, 16> References; SourceLocation Start; - bool FoundLines = false; AnnotatedLine *FirstNonImportLine = nullptr; + bool AnyImportAffected = false; for (auto Line : AnnotatedLines) { - if (!Line->Affected) { - // Only sort the first contiguous block of affected lines. - if (FoundLines) - break; - else - continue; - } Current = Line->First; LineEnd = Line->Last; skipComments(); @@ -294,15 +293,20 @@ private: // of the import that immediately follows them by using the previously // set Start. Start = Line->First->Tok.getLocation(); - if (!Current) - continue; // Only comments on this line. - FoundLines = true; + if (!Current) { + // Only comments on this line. Could be the first non-import line. + FirstNonImportLine = Line; + continue; + } JsModuleReference Reference; Reference.Range.setBegin(Start); if (!parseModuleReference(Keywords, Reference)) { - FirstNonImportLine = Line; + if (!FirstNonImportLine) + FirstNonImportLine = Line; // if no comment before. break; } + FirstNonImportLine = nullptr; + AnyImportAffected = AnyImportAffected || Line->Affected; Reference.Range.setEnd(LineEnd->Tok.getEndLoc()); DEBUG({ llvm::dbgs() << "JsModuleReference: {" @@ -319,6 +323,9 @@ private: References.push_back(Reference); Start = SourceLocation(); } + // Sort imports if any import line was affected. + if (!AnyImportAffected) + References.clear(); return std::make_pair(References, FirstNonImportLine); } @@ -342,7 +349,6 @@ private: if (!parseModuleBindings(Keywords, Reference)) return false; - nextToken(); if (Current->is(Keywords.kw_from)) { // imports have a 'from' clause, exports might not. @@ -385,19 +391,28 @@ private: if (Current->isNot(tok::identifier)) return false; Reference.Prefix = Current->TokenText; + nextToken(); return true; } bool parseNamedBindings(const AdditionalKeywords &Keywords, JsModuleReference &Reference) { + if (Current->is(tok::identifier)) { + nextToken(); + if (Current->is(Keywords.kw_from)) + return true; + if (Current->isNot(tok::comma)) + return false; + nextToken(); // eat comma. + } if (Current->isNot(tok::l_brace)) return false; // {sym as alias, sym2 as ...} from '...'; - nextToken(); - while (true) { + while (Current->isNot(tok::r_brace)) { + nextToken(); if (Current->is(tok::r_brace)) - return true; + break; if (Current->isNot(tok::identifier)) return false; @@ -418,12 +433,11 @@ private: Symbol.Range.setEnd(Current->Tok.getLocation()); Reference.Symbols.push_back(Symbol); - if (Current->is(tok::r_brace)) - return true; - if (Current->isNot(tok::comma)) + if (!Current->isOneOf(tok::r_brace, tok::comma)) return false; - nextToken(); } + nextToken(); // consume r_brace + return true; } }; diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp index 89ac35f3e8423..f2e4e8ef08197 100644 --- a/lib/Format/TokenAnalyzer.cpp +++ b/lib/Format/TokenAnalyzer.cpp @@ -107,12 +107,12 @@ tooling::Replacements TokenAnalyzer::process() { } tooling::Replacements RunResult = - analyze(Annotator, AnnotatedLines, Tokens, Result); + analyze(Annotator, AnnotatedLines, Tokens); DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; - for (tooling::Replacements::iterator I = RunResult.begin(), - E = RunResult.end(); + for (tooling::Replacements::const_iterator I = RunResult.begin(), + E = RunResult.end(); I != E; ++I) { llvm::dbgs() << I->toString() << "\n"; } @@ -120,7 +120,15 @@ tooling::Replacements TokenAnalyzer::process() { for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { delete AnnotatedLines[i]; } - Result.insert(RunResult.begin(), RunResult.end()); + for (const auto &R : RunResult) { + auto Err = Result.add(R); + // FIXME: better error handling here. For now, simply return an empty + // Replacements to indicate failure. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + return tooling::Replacements(); + } + } } return Result; } diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h index c1aa9c594fc3e..78a3d1bc8d9e5 100644 --- a/lib/Format/TokenAnalyzer.h +++ b/lib/Format/TokenAnalyzer.h @@ -31,8 +31,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" -#define DEBUG_TYPE "format-formatter" - namespace clang { namespace format { @@ -57,15 +55,12 @@ public: FileID getFileID() const { return ID; } - StringRef getFileName() const { return FileName; } - ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; } const SourceManager &getSourceManager() const { return SM; } private: FileID ID; - StringRef FileName; SmallVector<CharSourceRange, 8> CharRanges; SourceManager &SM; @@ -87,7 +82,7 @@ protected: virtual tooling::Replacements analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens, tooling::Replacements &Result) = 0; + FormatTokenLexer &Tokens) = 0; void consumeUnwrappedLine(const UnwrappedLine &TheLine) override; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 4a90522e6e31c..cf6373f456573 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -273,8 +273,9 @@ private: !CurrentToken->Next->HasUnescapedNewline && !CurrentToken->Next->isTrailingComment()) HasMultipleParametersOnALine = true; - if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || - CurrentToken->isSimpleTypeSpecifier()) + if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) || + CurrentToken->Previous->isSimpleTypeSpecifier()) && + !CurrentToken->is(tok::l_brace)) Contexts.back().IsExpression = false; if (CurrentToken->isOneOf(tok::semi, tok::colon)) MightBeObjCForRangeLoop = false; @@ -305,8 +306,19 @@ private: FormatToken *Left = CurrentToken->Previous; Left->ParentBracket = Contexts.back().ContextKind; FormatToken *Parent = Left->getPreviousNonComment(); + + // Cases where '>' is followed by '['. + // In C++, this can happen either in array of templates (foo<int>[10]) + // or when array is a nested template type (unique_ptr<type1<type2>[]>). + bool CppArrayTemplates = + Style.Language == FormatStyle::LK_Cpp && Parent && + Parent->is(TT_TemplateCloser) && + (Contexts.back().CanBeExpression || Contexts.back().IsExpression || + Contexts.back().InTemplateArgument); + bool StartsObjCMethodExpr = - Style.Language == FormatStyle::LK_Cpp && + !CppArrayTemplates && (Style.Language == FormatStyle::LK_Cpp || + Style.Language == FormatStyle::LK_ObjC) && Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && CurrentToken->isNot(tok::l_brace) && (!Parent || @@ -326,7 +338,7 @@ private: Parent->isOneOf(tok::l_brace, tok::comma)) { Left->Type = TT_JsComputedPropertyName; } else if (Style.Language == FormatStyle::LK_Proto || - (Parent && + (!CppArrayTemplates && Parent && Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, tok::comma, tok::l_paren, tok::l_square, tok::question, tok::colon, tok::kw_return, @@ -422,7 +434,8 @@ private: FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (((CurrentToken->is(tok::colon) && (!Contexts.back().ColonIsDictLiteral || - Style.Language != FormatStyle::LK_Cpp)) || + (Style.Language != FormatStyle::LK_Cpp && + Style.Language != FormatStyle::LK_ObjC))) || Style.Language == FormatStyle::LK_Proto) && (Previous->Tok.getIdentifierInfo() || Previous->is(tok::string_literal))) @@ -431,6 +444,9 @@ private: Style.Language == FormatStyle::LK_JavaScript) Left->Type = TT_DictLiteral; } + if (CurrentToken->is(tok::comma) && + Style.Language == FormatStyle::LK_JavaScript) + Left->Type = TT_DictLiteral; if (!consumeToken()) return false; } @@ -508,19 +524,29 @@ private: } else if (Contexts.back().ColonIsObjCMethodExpr || Line.startsWith(TT_ObjCMethodSpecifier)) { Tok->Type = TT_ObjCMethodExpr; - Tok->Previous->Type = TT_SelectorName; - if (Tok->Previous->ColumnWidth > - Contexts.back().LongestObjCSelectorName) - Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; - if (!Contexts.back().FirstObjCSelectorName) - Contexts.back().FirstObjCSelectorName = Tok->Previous; + const FormatToken *BeforePrevious = Tok->Previous->Previous; + if (!BeforePrevious || + !(BeforePrevious->is(TT_CastRParen) || + (BeforePrevious->is(TT_ObjCMethodExpr) && + BeforePrevious->is(tok::colon))) || + BeforePrevious->is(tok::r_square) || + Contexts.back().LongestObjCSelectorName == 0) { + Tok->Previous->Type = TT_SelectorName; + if (Tok->Previous->ColumnWidth > + Contexts.back().LongestObjCSelectorName) + Contexts.back().LongestObjCSelectorName = + Tok->Previous->ColumnWidth; + if (!Contexts.back().FirstObjCSelectorName) + Contexts.back().FirstObjCSelectorName = Tok->Previous; + } } else if (Contexts.back().ColonIsForRangeExpr) { Tok->Type = TT_RangeBasedForLoopColon; } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { Tok->Type = TT_BitFieldColon; } else if (Contexts.size() == 1 && !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { - if (Tok->Previous->isOneOf(tok::r_paren, tok::kw_noexcept)) + if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren, + tok::kw_noexcept)) Tok->Type = TT_CtorInitializerColon; else Tok->Type = TT_InheritanceColon; @@ -858,7 +884,8 @@ private: if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, - TT_RegexLiteral)) + TT_OverloadedOperator, TT_RegexLiteral, + TT_TemplateString)) CurrentToken->Type = TT_Unknown; CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -1037,12 +1064,17 @@ private: !Current.Next->isBinaryOperator() && !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, tok::period, tok::arrow, tok::coloncolon)) - if (FormatToken *BeforeParen = Current.MatchingParen->Previous) - if (BeforeParen->is(tok::identifier) && - BeforeParen->TokenText == BeforeParen->TokenText.upper() && - (!BeforeParen->Previous || - BeforeParen->Previous->ClosesTemplateDeclaration)) - Current.Type = TT_FunctionAnnotationRParen; + if (FormatToken *AfterParen = Current.MatchingParen->Next) { + // Make sure this isn't the return type of an Obj-C block declaration + if (AfterParen->Tok.isNot(tok::caret)) { + if (FormatToken *BeforeParen = Current.MatchingParen->Previous) + if (BeforeParen->is(tok::identifier) && + BeforeParen->TokenText == BeforeParen->TokenText.upper() && + (!BeforeParen->Previous || + BeforeParen->Previous->ClosesTemplateDeclaration)) + Current.Type = TT_FunctionAnnotationRParen; + } + } } else if (Current.is(tok::at) && Current.Next) { if (Current.Next->isStringLiteral()) { Current.Type = TT_ObjCStringLiteral; @@ -1144,6 +1176,7 @@ private: bool rParenEndsCast(const FormatToken &Tok) { // C-style casts are only used in C++ and Java. if (Style.Language != FormatStyle::LK_Cpp && + Style.Language != FormatStyle::LK_ObjC && Style.Language != FormatStyle::LK_Java) return false; @@ -1206,6 +1239,13 @@ private: if (!LeftOfParens) return false; + // Certain token types inside the parentheses mean that this can't be a + // cast. + for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok; + Token = Token->Next) + if (Token->is(TT_BinaryOperator)) + return false; + // If the following token is an identifier or 'this', this is a cast. All // cases where this can be something else are handled above. if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) @@ -1243,7 +1283,7 @@ private: const FormatToken *NextToken = Tok.getNextNonComment(); if (!NextToken || - NextToken->isOneOf(tok::arrow, Keywords.kw_final, + NextToken->isOneOf(tok::arrow, Keywords.kw_final, tok::equal, Keywords.kw_override) || (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) return TT_PointerOrReference; @@ -1303,7 +1343,13 @@ private: TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (!PrevToken || PrevToken->is(TT_CastRParen)) + if (!PrevToken) + return TT_UnaryOperator; + + if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) && + !PrevToken->is(tok::exclaim)) + // There aren't any trailing unary operators except for TypeScript's + // non-null operator (!). Thus, this must be squence of leading operators. return TT_UnaryOperator; // Use heuristics to recognize unary operators. @@ -1560,6 +1606,13 @@ void TokenAnnotator::setCommentLineLevels( } } +static unsigned maxNestingDepth(const AnnotatedLine &Line) { + unsigned Result = 0; + for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next) + Result = std::max(Result, Tok->NestingLevel); + return Result; +} + void TokenAnnotator::annotate(AnnotatedLine &Line) { for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), E = Line.Children.end(); @@ -1568,6 +1621,14 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { } AnnotatingParser Parser(Style, Line, Keywords); Line.Type = Parser.parseLine(); + + // With very deep nesting, ExpressionParser uses lots of stack and the + // formatting algorithm is very slow. We're not going to do a good job here + // anyway - it's probably generated code being formatted by mistake. + // Just skip the whole line. + if (maxNestingDepth(Line) > 50) + Line.Type = LT_Invalid; + if (Line.Type == LT_Invalid) return; @@ -1816,10 +1877,12 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 100; if (Left.is(TT_JsTypeColon)) return 35; + if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || + (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) + return 100; } - if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && - Right.Next->is(TT_DictLiteral))) + if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) return 1; if (Right.is(tok::l_square)) { if (Style.Language == FormatStyle::LK_Proto) @@ -1935,20 +1998,24 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(TT_JavaAnnotation)) return 50; + if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous && + Left.Previous->isLabelString() && + (Left.NextOperator || Left.OperatorIndex != 0)) + return 45; + if (Right.is(tok::plus) && Left.isLabelString() && + (Right.NextOperator || Right.OperatorIndex != 0)) + return 25; + if (Left.is(tok::comma)) + return 1; + if (Right.is(tok::lessless) && Left.isLabelString() && + (Right.NextOperator || Right.OperatorIndex != 1)) + return 25; if (Right.is(tok::lessless)) { - if (Left.is(tok::string_literal) && - (Right.NextOperator || Right.OperatorIndex != 1)) { - StringRef Content = Left.TokenText; - if (Content.startswith("\"")) - Content = Content.drop_front(1); - if (Content.endswith("\"")) - Content = Content.drop_back(1); - Content = Content.trim(); - if (Content.size() > 1 && - (Content.back() == ':' || Content.back() == '=')) - return 25; - } - return 1; // Breaking at a << is really cheap. + // Breaking at a << is really cheap. + if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) + // Slightly prefer to break before the first one in log-like statements. + return 2; + return 1; } if (Left.is(TT_ConditionalExpr)) return prec::Conditional; @@ -1984,9 +2051,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Right.isOneOf(tok::semi, tok::comma)) return false; if (Right.is(tok::less) && - (Left.is(tok::kw_template) || - (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) + Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList) return true; + if (Right.is(tok::less) && Left.is(tok::kw_template)) + return Style.SpaceAfterTemplateKeyword; if (Left.isOneOf(tok::exclaim, tok::tilde)) return false; if (Left.is(tok::at) && @@ -2011,7 +2079,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Left.Previous->is(tok::r_paren)) || (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && (Style.PointerAlignment != FormatStyle::PAS_Left || - Line.IsMultiVariableDeclStmt))); + (Line.IsMultiVariableDeclStmt && + (Left.NestingLevel == 0 || + (Left.NestingLevel == 1 && Line.First->is(tok::kw_for))))))); if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && (!Left.is(TT_PointerOrReference) || (Style.PointerAlignment != FormatStyle::PAS_Right && @@ -2113,13 +2183,31 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(TT_JsFatArrow)) return true; + if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || + (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) + return false; + if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) + return false; if (Right.is(tok::star) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) return false; + if (Right.isOneOf(tok::l_brace, tok::l_square) && + Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) + return true; + // JS methods can use some keywords as names (e.g. `delete()`). + if (Right.is(tok::l_paren) && Line.MustBeDeclaration && + Left.Tok.getIdentifierInfo()) + return false; if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, Keywords.kw_of, tok::kw_const) && (!Left.Previous || !Left.Previous->is(tok::period))) return true; + if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && + Left.Previous->is(tok::period) && Right.is(tok::l_paren)) + return false; + if (Left.is(Keywords.kw_as) && + Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) + return true; if (Left.is(tok::kw_default) && Left.Previous && Left.Previous->is(tok::kw_export)) return true; @@ -2146,6 +2234,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, tok::r_square, tok::r_brace) || Left.Tok.isLiteral())) return false; + if (Left.is(tok::exclaim) && Right.is(Keywords.kw_as)) + return true; // "x! as string" } else if (Style.Language == FormatStyle::LK_Java) { if (Left.is(tok::r_square) && Right.is(tok::l_brace)) return true; @@ -2369,7 +2459,12 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Keywords.kw_implements)) return true; } else if (Style.Language == FormatStyle::LK_JavaScript) { - if (Left.is(tok::kw_return)) + const FormatToken *NonComment = Right.getPreviousNonComment(); + if (Left.isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, + tok::kw_throw) || + (NonComment && + NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, + tok::kw_throw))) return false; // Otherwise a semicolon is inserted. if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) return false; @@ -2383,6 +2478,18 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; if (Right.is(Keywords.kw_as)) return false; // must not break before as in 'x as type' casts + if (Left.is(Keywords.kw_declare) && + Right.isOneOf(Keywords.kw_module, tok::kw_namespace, + Keywords.kw_function, tok::kw_class, tok::kw_enum, + Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var, + Keywords.kw_let, tok::kw_const)) + // See grammar for 'declare' statements at: + // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10 + return false; + if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && + Right.isOneOf(tok::identifier, tok::string_literal)) { + return false; // must not break in "module foo { ...}" + } } if (Left.is(tok::at)) @@ -2415,10 +2522,13 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return !Style.BreakBeforeTernaryOperators; if (Right.is(TT_InheritanceColon)) return true; + if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) && + Left.isNot(TT_SelectorName)) + return true; if (Right.is(tok::colon) && !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) return false; - if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))) + if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) return true; if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_ObjCMethodExpr))) @@ -2434,6 +2544,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return true; if (Right.is(TT_RangeBasedForLoopColon)) return false; + if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener)) + return true; if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || Left.is(tok::kw_operator)) return false; @@ -2522,7 +2634,8 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { << " FakeLParens="; for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) llvm::errs() << Tok->FakeLParens[i] << "/"; - llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; + llvm::errs() << " FakeRParens=" << Tok->FakeRParens; + llvm::errs() << " Text='" << Tok->TokenText << "'\n"; if (!Tok->Next) assert(Tok == Line.Last); Tok = Tok->Next; diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index baa68ded97408..97daaf44ba99e 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -18,7 +18,6 @@ #include "UnwrappedLineParser.h" #include "clang/Format/Format.h" -#include <string> namespace clang { class SourceManager; diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index 35035ea8afba1..d7f1c4232d860 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -10,6 +10,7 @@ #include "UnwrappedLineFormatter.h" #include "WhitespaceManager.h" #include "llvm/Support/Debug.h" +#include <queue> #define DEBUG_TYPE "format-formatter" @@ -150,7 +151,7 @@ public: MergedLines = 0; if (!DryRun) for (unsigned i = 0; i < MergedLines; ++i) - join(*Next[i], *Next[i + 1]); + join(*Next[0], *Next[i + 1]); Next = Next + MergedLines + 1; return Current; } diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h index 478617d6a88e2..7bcead9d25e1a 100644 --- a/lib/Format/UnwrappedLineFormatter.h +++ b/lib/Format/UnwrappedLineFormatter.h @@ -19,8 +19,6 @@ #include "ContinuationIndenter.h" #include "clang/Format/Format.h" #include <map> -#include <queue> -#include <string> namespace clang { namespace format { diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index 2fe72987bc7ce..84e06d05c739f 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -360,14 +360,15 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { // BlockKind later if we parse a braced list (where all blocks // inside are by default braced lists), or when we explicitly detect // blocks (for example while parsing lambdas). - // - // We exclude + and - as they can be ObjC visibility modifiers. ProbablyBracedList = (Style.Language == FormatStyle::LK_JavaScript && - NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in)) || + NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, + Keywords.kw_as)) || NextTok->isOneOf(tok::comma, tok::period, tok::colon, tok::r_paren, tok::r_square, tok::l_brace, tok::l_square, tok::l_paren, tok::ellipsis) || + (NextTok->is(tok::identifier) && + !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || (NextTok->is(tok::semi) && (!ExpectClassBody || LBraceStack.size() != 1)) || (NextTok->isBinaryOperator() && !NextIsObjCMethod); @@ -668,19 +669,21 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords, // FIXME: This returns true for C/C++ keywords like 'struct'. return FormatTok->is(tok::identifier) && (FormatTok->Tok.getIdentifierInfo() == nullptr || - !FormatTok->isOneOf(Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, - Keywords.kw_async, Keywords.kw_await, - Keywords.kw_yield, Keywords.kw_finally, - Keywords.kw_function, Keywords.kw_import, - Keywords.kw_is, Keywords.kw_let, Keywords.kw_var, - Keywords.kw_abstract, Keywords.kw_extends, - Keywords.kw_implements, Keywords.kw_instanceof, - Keywords.kw_interface, Keywords.kw_throws)); + !FormatTok->isOneOf( + Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, + Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, + Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, + Keywords.kw_let, Keywords.kw_var, tok::kw_const, + Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, + Keywords.kw_instanceof, Keywords.kw_interface, + Keywords.kw_throws)); } static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok) { - return FormatTok->Tok.isLiteral() || mustBeJSIdent(Keywords, FormatTok); + return FormatTok->Tok.isLiteral() || + FormatTok->isOneOf(tok::kw_true, tok::kw_false) || + mustBeJSIdent(Keywords, FormatTok); } // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement @@ -724,6 +727,8 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { return; bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); + bool PreviousStartsTemplateExpr = + Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { // If the token before the previous one is an '@', the previous token is an // annotation and can precede another identifier/value. @@ -734,9 +739,12 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { if (Next->is(tok::exclaim) && PreviousMustBeValue) addUnwrappedLine(); bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); - if (NextMustBeValue && (PreviousMustBeValue || - Previous->isOneOf(tok::r_square, tok::r_paren, - tok::plusplus, tok::minusminus))) + bool NextEndsTemplateExpr = + Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); + if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && + (PreviousMustBeValue || + Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, + tok::minusminus))) addUnwrappedLine(); if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) addUnwrappedLine(); @@ -906,8 +914,8 @@ void UnwrappedLineParser::parseStructuralElement() { if (FormatTok->is(tok::colon)) { nextToken(); addUnwrappedLine(); + return; } - return; } // In all other cases, parse the declaration. break; @@ -1222,9 +1230,11 @@ void UnwrappedLineParser::tryToParseJSFunction() { // Consume "function". nextToken(); - // Consume * (generator function). - if (FormatTok->is(tok::star)) + // Consume * (generator function). Treat it like C++'s overloaded operators. + if (FormatTok->is(tok::star)) { + FormatTok->Type = TT_OverloadedOperator; nextToken(); + } // Consume function name. if (FormatTok->is(tok::identifier)) diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 9cdba9df10a9e..b64506f39035f 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -42,11 +42,6 @@ WhitespaceManager::Change::Change( TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), StartOfBlockComment(nullptr), IndentationOffset(0) {} -void WhitespaceManager::reset() { - Changes.clear(); - Replaces.clear(); -} - void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned IndentLevel, unsigned Spaces, unsigned StartOfTokenColumn, @@ -432,7 +427,7 @@ void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, } assert(Shift >= 0); Changes[i].Spaces += Shift; - if (i + 1 != End) + if (i + 1 != Changes.size()) Changes[i + 1].PreviousEndOfTokenColumn += Shift; Changes[i].StartOfTokenColumn += Shift; } @@ -502,8 +497,14 @@ void WhitespaceManager::storeReplacement(SourceRange Range, if (StringRef(SourceMgr.getCharacterData(Range.getBegin()), WhitespaceLength) == Text) return; - Replaces.insert(tooling::Replacement( + auto Err = Replaces.add(tooling::Replacement( SourceMgr, CharSourceRange::getCharRange(Range), Text)); + // FIXME: better error handling. For now, just print an error message in the + // release version. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + assert(false); + } } void WhitespaceManager::appendNewlineText(std::string &Text, diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index 3562347a0e60a..f42e371830b3d 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -41,9 +41,6 @@ public: bool UseCRLF) : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} - /// \brief Prepares the \c WhitespaceManager for another run. - void reset(); - /// \brief Replaces the whitespace in front of \p Tok. Only call once for /// each \c AnnotatedToken. void replaceWhitespace(FormatToken &Tok, unsigned Newlines, |