diff options
Diffstat (limited to 'lib/Format/Format.cpp')
-rw-r--r-- | lib/Format/Format.cpp | 498 |
1 files changed, 349 insertions, 149 deletions
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 32d6bb855ad6e..70b90d6fa14e0 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/YAMLTraits.h" #include <algorithm> #include <memory> -#include <queue> #include <string> #define DEBUG_TYPE "format-formatter" @@ -53,6 +52,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); IO.enumCase(Value, "Java", FormatStyle::LK_Java); IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); + IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC); IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); } @@ -339,6 +339,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("ReflowComments", Style.ReflowComments); IO.mapOptional("SortIncludes", Style.SortIncludes); IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); + IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword); IO.mapOptional("SpaceBeforeAssignmentOperators", Style.SpaceBeforeAssignmentOperators); IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); @@ -420,7 +421,7 @@ std::error_code make_error_code(ParseError e) { return std::error_code(static_cast<int>(e), getParseCategory()); } -const char *ParseErrorCategory::name() const LLVM_NOEXCEPT { +const char *ParseErrorCategory::name() const noexcept { return "clang-format.parse_error"; } @@ -553,6 +554,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesInContainerLiterals = true; LLVMStyle.SpacesInCStyleCastParentheses = false; LLVMStyle.SpaceAfterCStyleCast = false; + LLVMStyle.SpaceAfterTemplateKeyword = true; LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; LLVMStyle.SpaceBeforeAssignmentOperators = true; LLVMStyle.SpacesInAngles = false; @@ -609,10 +611,11 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { } else if (Language == FormatStyle::LK_JavaScript) { GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; GoogleStyle.AlignOperands = false; - GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; + GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.BreakBeforeTernaryOperators = false; - GoogleStyle.CommentPragmas = "@(export|requirecss|return|see|visibility) "; + GoogleStyle.CommentPragmas = + "(taze:|@(export|requirecss|return|returns|see|visibility)) "; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; GoogleStyle.SpacesInContainerLiterals = false; @@ -621,6 +624,8 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { } else if (Language == FormatStyle::LK_Proto) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; GoogleStyle.SpacesInContainerLiterals = false; + } else if (Language == FormatStyle::LK_ObjC) { + GoogleStyle.ColumnLimit = 100; } return GoogleStyle; @@ -650,10 +655,12 @@ FormatStyle getMozillaStyle() { MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; MozillaStyle.AlwaysBreakAfterReturnType = - FormatStyle::RTBS_TopLevelDefinitions; + FormatStyle::RTBS_TopLevel; MozillaStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel; MozillaStyle.AlwaysBreakTemplateDeclarations = true; + MozillaStyle.BinPackParameters = false; + MozillaStyle.BinPackArguments = false; MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; MozillaStyle.BreakConstructorInitializersBeforeComma = true; MozillaStyle.ConstructorInitializerIndentWidth = 2; @@ -664,6 +671,7 @@ FormatStyle getMozillaStyle() { MozillaStyle.ObjCSpaceBeforeProtocolList = false; MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; + MozillaStyle.SpaceAfterTemplateKeyword = false; return MozillaStyle; } @@ -683,7 +691,6 @@ FormatStyle getWebKitStyle() { Style.ObjCBlockIndentWidth = 4; Style.ObjCSpaceAfterProperty = true; Style.PointerAlignment = FormatStyle::PAS_Left; - Style.Standard = FormatStyle::LS_Cpp03; return Style; } @@ -791,46 +798,25 @@ std::string configurationAsText(const FormatStyle &Style) { namespace { -class Formatter : public TokenAnalyzer { +class JavaScriptRequoter : public TokenAnalyzer { public: - Formatter(const Environment &Env, const FormatStyle &Style, - bool *IncompleteFormat) - : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {} + JavaScriptRequoter(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} tooling::Replacements analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens, tooling::Replacements &Result) override { - deriveLocalStyle(AnnotatedLines); + FormatTokenLexer &Tokens) override { AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end()); - - if (Style.Language == FormatStyle::LK_JavaScript && - Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) - requoteJSStringLiteral(AnnotatedLines, Result); - - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - Annotator.calculateFormattingInformation(*AnnotatedLines[i]); - } - - Annotator.setCommentLineLevels(AnnotatedLines); - - WhitespaceManager Whitespaces( - Env.getSourceManager(), Style, - inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID()))); - ContinuationIndenter Indenter(Style, Tokens.getKeywords(), - Env.getSourceManager(), Whitespaces, Encoding, - BinPackInconclusiveFunctions); - UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), - IncompleteFormat) - .format(AnnotatedLines); - return Whitespaces.generateReplacements(); + tooling::Replacements Result; + requoteJSStringLiteral(AnnotatedLines, Result); + return Result; } private: - // If the last token is a double/single-quoted string literal, generates a - // replacement with a single/double quoted string literal, re-escaping the - // contents in the process. + // Replaces double/single-quoted string literal as appropriate, re-escaping + // the contents in the process. void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result) { for (AnnotatedLine *Line : Lines) { @@ -842,8 +828,7 @@ private: StringRef Input = FormatTok->TokenText; if (FormatTok->Finalized || !FormatTok->isStringLiteral() || // NB: testing for not starting with a double quote to avoid - // breaking - // `template strings`. + // breaking `template strings`. (Style.JavaScriptQuotes == FormatStyle::JSQS_Single && !Input.startswith("\"")) || (Style.JavaScriptQuotes == FormatStyle::JSQS_Double && @@ -855,15 +840,20 @@ private: SourceLocation Start = FormatTok->Tok.getLocation(); auto Replace = [&](SourceLocation Start, unsigned Length, StringRef ReplacementText) { - Result.insert(tooling::Replacement(Env.getSourceManager(), Start, - Length, ReplacementText)); + auto Err = Result.add(tooling::Replacement( + Env.getSourceManager(), Start, Length, ReplacementText)); + // FIXME: handle error. For now, print error message and skip the + // replacement for release version. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + assert(false); + } }; Replace(Start, 1, IsSingle ? "'" : "\""); Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1, IsSingle ? "'" : "\""); // Escape internal quotes. - size_t ColumnWidth = FormatTok->TokenText.size(); bool Escaped = false; for (size_t i = 1; i < Input.size() - 1; i++) { switch (Input[i]) { @@ -873,7 +863,6 @@ private: (!IsSingle && Input[i + 1] == '\''))) { // Remove this \, it's escaping a " or ' that no longer needs // escaping - ColumnWidth--; Replace(Start.getLocWithOffset(i), 1, ""); continue; } @@ -884,7 +873,6 @@ private: if (!Escaped && IsSingle == (Input[i] == '\'')) { // Escape the quote. Replace(Start.getLocWithOffset(i), 0, "\\"); - ColumnWidth++; } Escaped = false; break; @@ -893,16 +881,46 @@ private: break; } } - - // For formatting, count the number of non-escaped single quotes in them - // and adjust ColumnWidth to take the added escapes into account. - // FIXME(martinprobst): this might conflict with code breaking a long - // string literal (which clang-format doesn't do, yet). For that to - // work, this code would have to modify TokenText directly. - FormatTok->ColumnWidth = ColumnWidth; } } } +}; + +class Formatter : public TokenAnalyzer { +public: + Formatter(const Environment &Env, const FormatStyle &Style, + bool *IncompleteFormat) + : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {} + + tooling::Replacements + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override { + tooling::Replacements Result; + deriveLocalStyle(AnnotatedLines); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), + AnnotatedLines.end()); + for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { + Annotator.calculateFormattingInformation(*AnnotatedLines[i]); + } + Annotator.setCommentLineLevels(AnnotatedLines); + + WhitespaceManager Whitespaces( + Env.getSourceManager(), Style, + inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID()))); + ContinuationIndenter Indenter(Style, Tokens.getKeywords(), + Env.getSourceManager(), Whitespaces, Encoding, + BinPackInconclusiveFunctions); + UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), + IncompleteFormat) + .format(AnnotatedLines); + for (const auto &R : Whitespaces.generateReplacements()) + if (Result.add(R)) + return Result; + return Result; + } + +private: static bool inputUsesCRLF(StringRef Text) { return Text.count('\r') * 2 > Text.count('\n'); @@ -991,7 +1009,7 @@ public: tooling::Replacements analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens, tooling::Replacements &Result) override { + FormatTokenLexer &Tokens) override { // FIXME: in the current implementation the granularity of affected range // is an annotated line. However, this is not sufficient. Furthermore, // redundant code introduced by replacements does not necessarily @@ -1008,8 +1026,11 @@ public: if (Line->Affected) { cleanupRight(Line->First, tok::comma, tok::comma); cleanupRight(Line->First, TT_CtorInitializerColon, tok::comma); + cleanupRight(Line->First, tok::l_paren, tok::comma); + cleanupLeft(Line->First, tok::comma, tok::r_paren); cleanupLeft(Line->First, TT_CtorInitializerComma, tok::l_brace); cleanupLeft(Line->First, TT_CtorInitializerColon, tok::l_brace); + cleanupLeft(Line->First, TT_CtorInitializerColon, tok::equal); } } @@ -1027,11 +1048,12 @@ private: // Iterate through all lines and remove any empty (nested) namespaces. void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { + std::set<unsigned> DeletedLines; for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { auto &Line = *AnnotatedLines[i]; if (Line.startsWith(tok::kw_namespace) || Line.startsWith(tok::kw_inline, tok::kw_namespace)) { - checkEmptyNamespace(AnnotatedLines, i, i); + checkEmptyNamespace(AnnotatedLines, i, i, DeletedLines); } } @@ -1049,7 +1071,8 @@ private: // sets \p NewLine to the last line checked. // Returns true if the current namespace is empty. bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - unsigned CurrentLine, unsigned &NewLine) { + unsigned CurrentLine, unsigned &NewLine, + std::set<unsigned> &DeletedLines) { unsigned InitLine = CurrentLine, End = AnnotatedLines.size(); if (Style.BraceWrapping.AfterNamespace) { // If the left brace is in a new line, we should consume it first so that @@ -1069,7 +1092,8 @@ private: if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) || AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline, tok::kw_namespace)) { - if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine)) + if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine, + DeletedLines)) return false; CurrentLine = NewLine; continue; @@ -1121,6 +1145,8 @@ private: break; if (Left->is(LK) && Right->is(RK)) { deleteToken(DeleteLeft ? Left : Right); + for (auto *Tok = Left->Next; Tok && Tok != Right; Tok = Tok->Next) + deleteToken(Tok); // If the right token is deleted, we should keep the left token // unchanged and pair it with the new right token. if (!DeleteLeft) @@ -1164,7 +1190,14 @@ private: } auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(), Tokens[End]->Tok.getEndLoc()); - Fixes.insert(tooling::Replacement(Env.getSourceManager(), SR, "")); + auto Err = + Fixes.add(tooling::Replacement(Env.getSourceManager(), SR, "")); + // FIXME: better error handling. for now just print error message and skip + // for the release version. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + assert(false && "Fixes must not conflict!"); + } Idx = End + 1; } @@ -1186,8 +1219,6 @@ private: // Tokens to be deleted. std::set<FormatToken *, FormatTokenLess> DeletedTokens; - // The line numbers of lines to be deleted. - std::set<unsigned> DeletedLines; }; struct IncludeDirective { @@ -1210,15 +1241,50 @@ static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start, return false; } -// Sorts a block of includes given by 'Includes' alphabetically adding the -// necessary replacement to 'Replaces'. 'Includes' must be in strict source -// order. +// Returns a pair (Index, OffsetToEOL) describing the position of the cursor +// before sorting/deduplicating. Index is the index of the include under the +// cursor in the original set of includes. If this include has duplicates, it is +// the index of the first of the duplicates as the others are going to be +// removed. OffsetToEOL describes the cursor's position relative to the end of +// its current line. +// If `Cursor` is not on any #include, `Index` will be UINT_MAX. +static std::pair<unsigned, unsigned> +FindCursorIndex(const SmallVectorImpl<IncludeDirective> &Includes, + const SmallVectorImpl<unsigned> &Indices, unsigned Cursor) { + unsigned CursorIndex = UINT_MAX; + unsigned OffsetToEOL = 0; + for (int i = 0, e = Includes.size(); i != e; ++i) { + unsigned Start = Includes[Indices[i]].Offset; + unsigned End = Start + Includes[Indices[i]].Text.size(); + if (!(Cursor >= Start && Cursor < End)) + continue; + CursorIndex = Indices[i]; + OffsetToEOL = End - Cursor; + // Put the cursor on the only remaining #include among the duplicate + // #includes. + while (--i >= 0 && Includes[CursorIndex].Text == Includes[Indices[i]].Text) + CursorIndex = i; + break; + } + return std::make_pair(CursorIndex, OffsetToEOL); +} + +// Sorts and deduplicate a block of includes given by 'Includes' alphabetically +// adding the necessary replacement to 'Replaces'. 'Includes' must be in strict +// source order. +// #include directives with the same text will be deduplicated, and only the +// first #include in the duplicate #includes remains. If the `Cursor` is +// provided and put on a deleted #include, it will be moved to the remaining +// #include in the duplicate #includes. static void sortCppIncludes(const FormatStyle &Style, - const SmallVectorImpl<IncludeDirective> &Includes, - ArrayRef<tooling::Range> Ranges, StringRef FileName, - tooling::Replacements &Replaces, unsigned *Cursor) { - if (!affectsRange(Ranges, Includes.front().Offset, - Includes.back().Offset + Includes.back().Text.size())) + const SmallVectorImpl<IncludeDirective> &Includes, + ArrayRef<tooling::Range> Ranges, StringRef FileName, + tooling::Replacements &Replaces, unsigned *Cursor) { + unsigned IncludesBeginOffset = Includes.front().Offset; + unsigned IncludesEndOffset = + Includes.back().Offset + Includes.back().Text.size(); + unsigned IncludesBlockSize = IncludesEndOffset - IncludesBeginOffset; + if (!affectsRange(Ranges, IncludesBeginOffset, IncludesEndOffset)) return; SmallVector<unsigned, 16> Indices; for (unsigned i = 0, e = Includes.size(); i != e; ++i) @@ -1228,37 +1294,45 @@ static void sortCppIncludes(const FormatStyle &Style, return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) < std::tie(Includes[RHSI].Category, Includes[RHSI].Filename); }); + // The index of the include on which the cursor will be put after + // sorting/deduplicating. + unsigned CursorIndex; + // The offset from cursor to the end of line. + unsigned CursorToEOLOffset; + if (Cursor) + std::tie(CursorIndex, CursorToEOLOffset) = + FindCursorIndex(Includes, Indices, *Cursor); + + // Deduplicate #includes. + Indices.erase(std::unique(Indices.begin(), Indices.end(), + [&](unsigned LHSI, unsigned RHSI) { + return Includes[LHSI].Text == Includes[RHSI].Text; + }), + Indices.end()); // If the #includes are out of order, we generate a single replacement fixing // the entire block. Otherwise, no replacement is generated. - if (std::is_sorted(Indices.begin(), Indices.end())) + if (Indices.size() == Includes.size() && + std::is_sorted(Indices.begin(), Indices.end())) return; std::string result; - bool CursorMoved = false; for (unsigned Index : Indices) { if (!result.empty()) result += "\n"; result += Includes[Index].Text; - - if (Cursor && !CursorMoved) { - unsigned Start = Includes[Index].Offset; - unsigned End = Start + Includes[Index].Text.size(); - if (*Cursor >= Start && *Cursor < End) { - *Cursor = Includes.front().Offset + result.size() + *Cursor - End; - CursorMoved = true; - } - } + if (Cursor && CursorIndex == Index) + *Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset; } - // Sorting #includes shouldn't change their total number of characters. - // This would otherwise mess up 'Ranges'. - assert(result.size() == - Includes.back().Offset + Includes.back().Text.size() - - Includes.front().Offset); - - Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset, - result.size(), result)); + auto Err = Replaces.add(tooling::Replacement( + FileName, Includes.front().Offset, IncludesBlockSize, result)); + // FIXME: better error handling. For now, just skip the replacement for the + // release version. + if (Err) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + assert(false); + } } namespace { @@ -1403,14 +1477,13 @@ processReplacements(T ProcessFunc, StringRef Code, auto NewCode = applyAllReplacements(Code, Replaces); if (!NewCode) return NewCode.takeError(); - std::vector<tooling::Range> ChangedRanges = - tooling::calculateChangedRanges(Replaces); + std::vector<tooling::Range> ChangedRanges = Replaces.getAffectedRanges(); StringRef FileName = Replaces.begin()->getFilePath(); tooling::Replacements FormatReplaces = ProcessFunc(Style, *NewCode, ChangedRanges, FileName); - return mergeReplacements(Replaces, FormatReplaces); + return Replaces.merge(FormatReplaces); } llvm::Expected<tooling::Replacements> @@ -1441,14 +1514,31 @@ formatReplacements(StringRef Code, const tooling::Replacements &Replaces, namespace { inline bool isHeaderInsertion(const tooling::Replacement &Replace) { - return Replace.getOffset() == UINT_MAX && + return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 && llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText()); } -void skipComments(Lexer &Lex, Token &Tok) { - while (Tok.is(tok::comment)) - if (Lex.LexFromRawLexer(Tok)) - return; +inline bool isHeaderDeletion(const tooling::Replacement &Replace) { + return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1; +} + +// Returns the offset after skipping a sequence of tokens, matched by \p +// GetOffsetAfterSequence, from the start of the code. +// \p GetOffsetAfterSequence should be a function that matches a sequence of +// tokens and returns an offset after the sequence. +unsigned getOffsetAfterTokenSequence( + StringRef FileName, StringRef Code, const FormatStyle &Style, + std::function<unsigned(const SourceManager &, Lexer &, Token &)> + GetOffsetAfterSequense) { + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); + const SourceManager &SourceMgr = Env->getSourceManager(); + Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, + getFormattingLangOpts(Style)); + Token Tok; + // Get the first token. + Lex.LexFromRawLexer(Tok); + return GetOffsetAfterSequense(SourceMgr, Lex, Tok); } // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, @@ -1464,32 +1554,90 @@ bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) { return Matched; } +void skipComments(Lexer &Lex, Token &Tok) { + while (Tok.is(tok::comment)) + if (Lex.LexFromRawLexer(Tok)) + return; +} + +// Returns the offset after header guard directives and any comments +// before/after header guards. If no header guard presents in the code, this +// will returns the offset after skipping all comments from the start of the +// code. unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, StringRef Code, const FormatStyle &Style) { - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); - const SourceManager &SourceMgr = Env->getSourceManager(); - Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, - getFormattingLangOpts(Style)); - Token Tok; - // Get the first token. - Lex.LexFromRawLexer(Tok); - skipComments(Lex, Tok); - unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation()); - if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { - skipComments(Lex, Tok); - if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) - return SourceMgr.getFileOffset(Tok.getLocation()); + return getOffsetAfterTokenSequence( + FileName, Code, Style, + [](const SourceManager &SM, Lexer &Lex, Token Tok) { + skipComments(Lex, Tok); + unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); + if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { + skipComments(Lex, Tok); + if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) + return SM.getFileOffset(Tok.getLocation()); + } + return InitialOffset; + }); +} + +// Check if a sequence of tokens is like +// "#include ("header.h" | <header.h>)". +// If it is, \p Tok will be the token after this directive; otherwise, it can be +// any token after the given \p Tok (including \p Tok). +bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { + auto Matched = [&]() { + Lex.LexFromRawLexer(Tok); + return true; + }; + if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && + Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") { + if (Lex.LexFromRawLexer(Tok)) + return false; + if (Tok.is(tok::string_literal)) + return Matched(); + if (Tok.is(tok::less)) { + while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) { + } + if (Tok.is(tok::greater)) + return Matched(); + } } - return AfterComments; + return false; +} + +// Returns the offset of the last #include directive after which a new +// #include can be inserted. This ignores #include's after the #include block(s) +// in the beginning of a file to avoid inserting headers into code sections +// where new #include's should not be added by default. +// These code sections include: +// - raw string literals (containing #include). +// - #if blocks. +// - Special #include's among declarations (e.g. functions). +// +// If no #include after which a new #include can be inserted, this returns the +// offset after skipping all comments from the start of the code. +// Inserting after an #include is not allowed if it comes after code that is not +// #include (e.g. pre-processing directive that is not #include, declarations). +unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, + const FormatStyle &Style) { + return getOffsetAfterTokenSequence( + FileName, Code, Style, + [](const SourceManager &SM, Lexer &Lex, Token Tok) { + skipComments(Lex, Tok); + unsigned MaxOffset = SM.getFileOffset(Tok.getLocation()); + while (checkAndConsumeInclusiveDirective(Lex, Tok)) + MaxOffset = SM.getFileOffset(Tok.getLocation()); + return MaxOffset; + }); +} + +bool isDeletedHeader(llvm::StringRef HeaderName, + const std::set<llvm::StringRef> &HeadersToDelete) { + return HeadersToDelete.count(HeaderName) || + HeadersToDelete.count(HeaderName.trim("\"<>")); } -// FIXME: we also need to insert a '\n' at the end of the code if we have an -// insertion with offset Code.size(), and there is no '\n' at the end of the -// code. -// FIXME: do not insert headers into conditional #include blocks, e.g. #includes -// surrounded by compile condition "#if...". // FIXME: insert empty lines between newly created blocks. tooling::Replacements fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, @@ -1498,20 +1646,25 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, return Replaces; tooling::Replacements HeaderInsertions; + std::set<llvm::StringRef> HeadersToDelete; + tooling::Replacements Result; for (const auto &R : Replaces) { - if (isHeaderInsertion(R)) - HeaderInsertions.insert(R); - else if (R.getOffset() == UINT_MAX) + if (isHeaderInsertion(R)) { + // Replacements from \p Replaces must be conflict-free already, so we can + // simply consume the error. + llvm::consumeError(HeaderInsertions.add(R)); + } else if (isHeaderDeletion(R)) { + HeadersToDelete.insert(R.getReplacementText()); + } else if (R.getOffset() == UINT_MAX) { llvm::errs() << "Insertions other than header #include insertion are " "not supported! " << R.getReplacementText() << "\n"; + } else { + llvm::consumeError(Result.add(R)); + } } - if (HeaderInsertions.empty()) + if (HeaderInsertions.empty() && HeadersToDelete.empty()) return Replaces; - tooling::Replacements Result; - std::set_difference(Replaces.begin(), Replaces.end(), - HeaderInsertions.begin(), HeaderInsertions.end(), - std::inserter(Result, Result.begin())); llvm::Regex IncludeRegex(IncludeRegexPattern); llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)"); @@ -1532,6 +1685,10 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, unsigned MinInsertOffset = getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style); StringRef TrimmedCode = Code.drop_front(MinInsertOffset); + // Max insertion offset in the original code. + unsigned MaxInsertOffset = + MinInsertOffset + + getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style); SmallVector<StringRef, 32> Lines; TrimmedCode.split(Lines, '\n'); unsigned Offset = MinInsertOffset; @@ -1540,13 +1697,30 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, for (auto Line : Lines) { NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); if (IncludeRegex.match(Line, &Matches)) { + // The header name with quotes or angle brackets. StringRef IncludeName = Matches[2]; ExistingIncludes.insert(IncludeName); - int Category = Categories.getIncludePriority( - IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); - CategoryEndOffsets[Category] = NextLineOffset; - if (FirstIncludeOffset < 0) - FirstIncludeOffset = Offset; + // Only record the offset of current #include if we can insert after it. + if (Offset <= MaxInsertOffset) { + int Category = Categories.getIncludePriority( + IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); + CategoryEndOffsets[Category] = NextLineOffset; + if (FirstIncludeOffset < 0) + FirstIncludeOffset = Offset; + } + if (isDeletedHeader(IncludeName, HeadersToDelete)) { + // If this is the last line without trailing newline, we need to make + // sure we don't delete across the file boundary. + unsigned Length = std::min(Line.size() + 1, Code.size() - Offset); + llvm::Error Err = + Result.add(tooling::Replacement(FileName, Offset, Length, "")); + if (Err) { + // Ignore the deletion on conflict. + llvm::errs() << "Failed to add header deletion replacement for " + << IncludeName << ": " << llvm::toString(std::move(Err)) + << "\n"; + } + } } Offset = NextLineOffset; } @@ -1570,6 +1744,7 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end()) CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)]; + bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n'; for (const auto &R : HeaderInsertions) { auto IncludeDirective = R.getReplacementText(); bool Matched = IncludeRegex.match(IncludeDirective, &Matches); @@ -1588,7 +1763,20 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, std::string NewInclude = !IncludeDirective.endswith("\n") ? (IncludeDirective + "\n").str() : IncludeDirective.str(); - Result.insert(tooling::Replacement(FileName, Offset, 0, NewInclude)); + // When inserting headers at end of the code, also append '\n' to the code + // if it does not end with '\n'. + if (NeedNewLineAtEnd && Offset == Code.size()) { + NewInclude = "\n" + NewInclude; + NeedNewLineAtEnd = false; + } + auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude); + auto Err = Result.add(NewReplace); + if (Err) { + llvm::consumeError(std::move(Err)); + unsigned NewOffset = Result.getShiftedCodePosition(Offset); + NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude); + Result = Result.merge(tooling::Replacements(NewReplace)); + } } return Result; } @@ -1611,18 +1799,6 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, return processReplacements(Cleanup, Code, NewReplaces, Style); } -tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SM, - FileID ID, ArrayRef<CharSourceRange> Ranges, - bool *IncompleteFormat) { - FormatStyle Expanded = expandPresets(Style); - if (Expanded.DisableFormat) - return tooling::Replacements(); - - Environment Env(SM, ID, Ranges); - Formatter Format(Env, Expanded, IncompleteFormat); - return Format.process(); -} - tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName, bool *IncompleteFormat) { @@ -1630,19 +1806,28 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, if (Expanded.DisableFormat) return tooling::Replacements(); - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + auto Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + + if (Style.Language == FormatStyle::LK_JavaScript && + Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) { + JavaScriptRequoter Requoter(*Env, Expanded); + tooling::Replacements Requotes = Requoter.process(); + if (!Requotes.empty()) { + auto NewCode = applyAllReplacements(Code, Requotes); + if (NewCode) { + auto NewEnv = Environment::CreateVirtualEnvironment( + *NewCode, FileName, + tooling::calculateRangesAfterReplacements(Requotes, Ranges)); + Formatter Format(*NewEnv, Expanded, IncompleteFormat); + return Requotes.merge(Format.process()); + } + } + } + Formatter Format(*Env, Expanded, IncompleteFormat); return Format.process(); } -tooling::Replacements cleanup(const FormatStyle &Style, SourceManager &SM, - FileID ID, ArrayRef<CharSourceRange> Ranges) { - Environment Env(SM, ID, Ranges); - Cleaner Clean(Env, Style); - return Clean.process(); -} - tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName) { @@ -1684,6 +1869,8 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { return FormatStyle::LK_Java; if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) return FormatStyle::LK_JavaScript; // JavaScript or TypeScript. + if (FileName.endswith(".m") || FileName.endswith(".mm")) + return FormatStyle::LK_ObjC; if (FileName.endswith_lower(".proto") || FileName.endswith_lower(".protodevel")) return FormatStyle::LK_Proto; @@ -1693,12 +1880,21 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { } FormatStyle getStyle(StringRef StyleName, StringRef FileName, - StringRef FallbackStyle, vfs::FileSystem *FS) { + StringRef FallbackStyle, StringRef Code, + vfs::FileSystem *FS) { if (!FS) { FS = vfs::getRealFileSystem().get(); } FormatStyle Style = getLLVMStyle(); Style.Language = getLanguageByFileName(FileName); + + // This is a very crude detection of whether a header contains ObjC code that + // should be improved over time and probably be done on tokens, not one the + // bare content of the file. + if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") && + (Code.contains("\n- (") || Code.contains("\n+ ("))) + Style.Language = FormatStyle::LK_ObjC; + if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) { llvm::errs() << "Invalid fallback style \"" << FallbackStyle << "\" using LLVM style\n"; @@ -1724,7 +1920,11 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName, // Look for .clang-format/_clang-format file in the file's parent directories. SmallString<128> UnsuitableConfigFiles; SmallString<128> Path(FileName); - llvm::sys::fs::make_absolute(Path); + if (std::error_code EC = FS->makeAbsolute(Path)) { + llvm::errs() << EC.message() << "\n"; + return Style; + } + for (StringRef Directory = Path; !Directory.empty(); Directory = llvm::sys::path::parent_path(Directory)) { |