summaryrefslogtreecommitdiff
path: root/lib/Format/Format.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Format/Format.cpp')
-rw-r--r--lib/Format/Format.cpp498
1 files changed, 349 insertions, 149 deletions
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 32d6bb855ad6e..70b90d6fa14e0 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -36,7 +36,6 @@
#include "llvm/Support/YAMLTraits.h"
#include <algorithm>
#include <memory>
-#include <queue>
#include <string>
#define DEBUG_TYPE "format-formatter"
@@ -53,6 +52,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
IO.enumCase(Value, "Java", FormatStyle::LK_Java);
IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
+ IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC);
IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
}
@@ -339,6 +339,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("ReflowComments", Style.ReflowComments);
IO.mapOptional("SortIncludes", Style.SortIncludes);
IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
+ IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword);
IO.mapOptional("SpaceBeforeAssignmentOperators",
Style.SpaceBeforeAssignmentOperators);
IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
@@ -420,7 +421,7 @@ std::error_code make_error_code(ParseError e) {
return std::error_code(static_cast<int>(e), getParseCategory());
}
-const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
+const char *ParseErrorCategory::name() const noexcept {
return "clang-format.parse_error";
}
@@ -553,6 +554,7 @@ FormatStyle getLLVMStyle() {
LLVMStyle.SpacesInContainerLiterals = true;
LLVMStyle.SpacesInCStyleCastParentheses = false;
LLVMStyle.SpaceAfterCStyleCast = false;
+ LLVMStyle.SpaceAfterTemplateKeyword = true;
LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
LLVMStyle.SpaceBeforeAssignmentOperators = true;
LLVMStyle.SpacesInAngles = false;
@@ -609,10 +611,11 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
} else if (Language == FormatStyle::LK_JavaScript) {
GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
GoogleStyle.AlignOperands = false;
- GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
+ GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
GoogleStyle.BreakBeforeTernaryOperators = false;
- GoogleStyle.CommentPragmas = "@(export|requirecss|return|see|visibility) ";
+ GoogleStyle.CommentPragmas =
+ "(taze:|@(export|requirecss|return|returns|see|visibility)) ";
GoogleStyle.MaxEmptyLinesToKeep = 3;
GoogleStyle.NamespaceIndentation = FormatStyle::NI_All;
GoogleStyle.SpacesInContainerLiterals = false;
@@ -621,6 +624,8 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
} else if (Language == FormatStyle::LK_Proto) {
GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
GoogleStyle.SpacesInContainerLiterals = false;
+ } else if (Language == FormatStyle::LK_ObjC) {
+ GoogleStyle.ColumnLimit = 100;
}
return GoogleStyle;
@@ -650,10 +655,12 @@ FormatStyle getMozillaStyle() {
MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
MozillaStyle.AlwaysBreakAfterReturnType =
- FormatStyle::RTBS_TopLevelDefinitions;
+ FormatStyle::RTBS_TopLevel;
MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
FormatStyle::DRTBS_TopLevel;
MozillaStyle.AlwaysBreakTemplateDeclarations = true;
+ MozillaStyle.BinPackParameters = false;
+ MozillaStyle.BinPackArguments = false;
MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
MozillaStyle.BreakConstructorInitializersBeforeComma = true;
MozillaStyle.ConstructorInitializerIndentWidth = 2;
@@ -664,6 +671,7 @@ FormatStyle getMozillaStyle() {
MozillaStyle.ObjCSpaceBeforeProtocolList = false;
MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
+ MozillaStyle.SpaceAfterTemplateKeyword = false;
return MozillaStyle;
}
@@ -683,7 +691,6 @@ FormatStyle getWebKitStyle() {
Style.ObjCBlockIndentWidth = 4;
Style.ObjCSpaceAfterProperty = true;
Style.PointerAlignment = FormatStyle::PAS_Left;
- Style.Standard = FormatStyle::LS_Cpp03;
return Style;
}
@@ -791,46 +798,25 @@ std::string configurationAsText(const FormatStyle &Style) {
namespace {
-class Formatter : public TokenAnalyzer {
+class JavaScriptRequoter : public TokenAnalyzer {
public:
- Formatter(const Environment &Env, const FormatStyle &Style,
- bool *IncompleteFormat)
- : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {}
+ JavaScriptRequoter(const Environment &Env, const FormatStyle &Style)
+ : TokenAnalyzer(Env, Style) {}
tooling::Replacements
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
- FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
- deriveLocalStyle(AnnotatedLines);
+ FormatTokenLexer &Tokens) override {
AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
AnnotatedLines.end());
-
- if (Style.Language == FormatStyle::LK_JavaScript &&
- Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
- requoteJSStringLiteral(AnnotatedLines, Result);
-
- for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
- Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
- }
-
- Annotator.setCommentLineLevels(AnnotatedLines);
-
- WhitespaceManager Whitespaces(
- Env.getSourceManager(), Style,
- inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())));
- ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
- Env.getSourceManager(), Whitespaces, Encoding,
- BinPackInconclusiveFunctions);
- UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
- IncompleteFormat)
- .format(AnnotatedLines);
- return Whitespaces.generateReplacements();
+ tooling::Replacements Result;
+ requoteJSStringLiteral(AnnotatedLines, Result);
+ return Result;
}
private:
- // If the last token is a double/single-quoted string literal, generates a
- // replacement with a single/double quoted string literal, re-escaping the
- // contents in the process.
+ // Replaces double/single-quoted string literal as appropriate, re-escaping
+ // the contents in the process.
void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
tooling::Replacements &Result) {
for (AnnotatedLine *Line : Lines) {
@@ -842,8 +828,7 @@ private:
StringRef Input = FormatTok->TokenText;
if (FormatTok->Finalized || !FormatTok->isStringLiteral() ||
// NB: testing for not starting with a double quote to avoid
- // breaking
- // `template strings`.
+ // breaking `template strings`.
(Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
!Input.startswith("\"")) ||
(Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
@@ -855,15 +840,20 @@ private:
SourceLocation Start = FormatTok->Tok.getLocation();
auto Replace = [&](SourceLocation Start, unsigned Length,
StringRef ReplacementText) {
- Result.insert(tooling::Replacement(Env.getSourceManager(), Start,
- Length, ReplacementText));
+ auto Err = Result.add(tooling::Replacement(
+ Env.getSourceManager(), Start, Length, ReplacementText));
+ // FIXME: handle error. For now, print error message and skip the
+ // replacement for release version.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ assert(false);
+ }
};
Replace(Start, 1, IsSingle ? "'" : "\"");
Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
IsSingle ? "'" : "\"");
// Escape internal quotes.
- size_t ColumnWidth = FormatTok->TokenText.size();
bool Escaped = false;
for (size_t i = 1; i < Input.size() - 1; i++) {
switch (Input[i]) {
@@ -873,7 +863,6 @@ private:
(!IsSingle && Input[i + 1] == '\''))) {
// Remove this \, it's escaping a " or ' that no longer needs
// escaping
- ColumnWidth--;
Replace(Start.getLocWithOffset(i), 1, "");
continue;
}
@@ -884,7 +873,6 @@ private:
if (!Escaped && IsSingle == (Input[i] == '\'')) {
// Escape the quote.
Replace(Start.getLocWithOffset(i), 0, "\\");
- ColumnWidth++;
}
Escaped = false;
break;
@@ -893,16 +881,46 @@ private:
break;
}
}
-
- // For formatting, count the number of non-escaped single quotes in them
- // and adjust ColumnWidth to take the added escapes into account.
- // FIXME(martinprobst): this might conflict with code breaking a long
- // string literal (which clang-format doesn't do, yet). For that to
- // work, this code would have to modify TokenText directly.
- FormatTok->ColumnWidth = ColumnWidth;
}
}
}
+};
+
+class Formatter : public TokenAnalyzer {
+public:
+ Formatter(const Environment &Env, const FormatStyle &Style,
+ bool *IncompleteFormat)
+ : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {}
+
+ tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override {
+ tooling::Replacements Result;
+ deriveLocalStyle(AnnotatedLines);
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+ AnnotatedLines.end());
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
+ }
+ Annotator.setCommentLineLevels(AnnotatedLines);
+
+ WhitespaceManager Whitespaces(
+ Env.getSourceManager(), Style,
+ inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())));
+ ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
+ Env.getSourceManager(), Whitespaces, Encoding,
+ BinPackInconclusiveFunctions);
+ UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
+ IncompleteFormat)
+ .format(AnnotatedLines);
+ for (const auto &R : Whitespaces.generateReplacements())
+ if (Result.add(R))
+ return Result;
+ return Result;
+ }
+
+private:
static bool inputUsesCRLF(StringRef Text) {
return Text.count('\r') * 2 > Text.count('\n');
@@ -991,7 +1009,7 @@ public:
tooling::Replacements
analyze(TokenAnnotator &Annotator,
SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
- FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
+ FormatTokenLexer &Tokens) override {
// FIXME: in the current implementation the granularity of affected range
// is an annotated line. However, this is not sufficient. Furthermore,
// redundant code introduced by replacements does not necessarily
@@ -1008,8 +1026,11 @@ public:
if (Line->Affected) {
cleanupRight(Line->First, tok::comma, tok::comma);
cleanupRight(Line->First, TT_CtorInitializerColon, tok::comma);
+ cleanupRight(Line->First, tok::l_paren, tok::comma);
+ cleanupLeft(Line->First, tok::comma, tok::r_paren);
cleanupLeft(Line->First, TT_CtorInitializerComma, tok::l_brace);
cleanupLeft(Line->First, TT_CtorInitializerColon, tok::l_brace);
+ cleanupLeft(Line->First, TT_CtorInitializerColon, tok::equal);
}
}
@@ -1027,11 +1048,12 @@ private:
// Iterate through all lines and remove any empty (nested) namespaces.
void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+ std::set<unsigned> DeletedLines;
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
auto &Line = *AnnotatedLines[i];
if (Line.startsWith(tok::kw_namespace) ||
Line.startsWith(tok::kw_inline, tok::kw_namespace)) {
- checkEmptyNamespace(AnnotatedLines, i, i);
+ checkEmptyNamespace(AnnotatedLines, i, i, DeletedLines);
}
}
@@ -1049,7 +1071,8 @@ private:
// sets \p NewLine to the last line checked.
// Returns true if the current namespace is empty.
bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
- unsigned CurrentLine, unsigned &NewLine) {
+ unsigned CurrentLine, unsigned &NewLine,
+ std::set<unsigned> &DeletedLines) {
unsigned InitLine = CurrentLine, End = AnnotatedLines.size();
if (Style.BraceWrapping.AfterNamespace) {
// If the left brace is in a new line, we should consume it first so that
@@ -1069,7 +1092,8 @@ private:
if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) ||
AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline,
tok::kw_namespace)) {
- if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine))
+ if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine,
+ DeletedLines))
return false;
CurrentLine = NewLine;
continue;
@@ -1121,6 +1145,8 @@ private:
break;
if (Left->is(LK) && Right->is(RK)) {
deleteToken(DeleteLeft ? Left : Right);
+ for (auto *Tok = Left->Next; Tok && Tok != Right; Tok = Tok->Next)
+ deleteToken(Tok);
// If the right token is deleted, we should keep the left token
// unchanged and pair it with the new right token.
if (!DeleteLeft)
@@ -1164,7 +1190,14 @@ private:
}
auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(),
Tokens[End]->Tok.getEndLoc());
- Fixes.insert(tooling::Replacement(Env.getSourceManager(), SR, ""));
+ auto Err =
+ Fixes.add(tooling::Replacement(Env.getSourceManager(), SR, ""));
+ // FIXME: better error handling. for now just print error message and skip
+ // for the release version.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ assert(false && "Fixes must not conflict!");
+ }
Idx = End + 1;
}
@@ -1186,8 +1219,6 @@ private:
// Tokens to be deleted.
std::set<FormatToken *, FormatTokenLess> DeletedTokens;
- // The line numbers of lines to be deleted.
- std::set<unsigned> DeletedLines;
};
struct IncludeDirective {
@@ -1210,15 +1241,50 @@ static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
return false;
}
-// Sorts a block of includes given by 'Includes' alphabetically adding the
-// necessary replacement to 'Replaces'. 'Includes' must be in strict source
-// order.
+// Returns a pair (Index, OffsetToEOL) describing the position of the cursor
+// before sorting/deduplicating. Index is the index of the include under the
+// cursor in the original set of includes. If this include has duplicates, it is
+// the index of the first of the duplicates as the others are going to be
+// removed. OffsetToEOL describes the cursor's position relative to the end of
+// its current line.
+// If `Cursor` is not on any #include, `Index` will be UINT_MAX.
+static std::pair<unsigned, unsigned>
+FindCursorIndex(const SmallVectorImpl<IncludeDirective> &Includes,
+ const SmallVectorImpl<unsigned> &Indices, unsigned Cursor) {
+ unsigned CursorIndex = UINT_MAX;
+ unsigned OffsetToEOL = 0;
+ for (int i = 0, e = Includes.size(); i != e; ++i) {
+ unsigned Start = Includes[Indices[i]].Offset;
+ unsigned End = Start + Includes[Indices[i]].Text.size();
+ if (!(Cursor >= Start && Cursor < End))
+ continue;
+ CursorIndex = Indices[i];
+ OffsetToEOL = End - Cursor;
+ // Put the cursor on the only remaining #include among the duplicate
+ // #includes.
+ while (--i >= 0 && Includes[CursorIndex].Text == Includes[Indices[i]].Text)
+ CursorIndex = i;
+ break;
+ }
+ return std::make_pair(CursorIndex, OffsetToEOL);
+}
+
+// Sorts and deduplicate a block of includes given by 'Includes' alphabetically
+// adding the necessary replacement to 'Replaces'. 'Includes' must be in strict
+// source order.
+// #include directives with the same text will be deduplicated, and only the
+// first #include in the duplicate #includes remains. If the `Cursor` is
+// provided and put on a deleted #include, it will be moved to the remaining
+// #include in the duplicate #includes.
static void sortCppIncludes(const FormatStyle &Style,
- const SmallVectorImpl<IncludeDirective> &Includes,
- ArrayRef<tooling::Range> Ranges, StringRef FileName,
- tooling::Replacements &Replaces, unsigned *Cursor) {
- if (!affectsRange(Ranges, Includes.front().Offset,
- Includes.back().Offset + Includes.back().Text.size()))
+ const SmallVectorImpl<IncludeDirective> &Includes,
+ ArrayRef<tooling::Range> Ranges, StringRef FileName,
+ tooling::Replacements &Replaces, unsigned *Cursor) {
+ unsigned IncludesBeginOffset = Includes.front().Offset;
+ unsigned IncludesEndOffset =
+ Includes.back().Offset + Includes.back().Text.size();
+ unsigned IncludesBlockSize = IncludesEndOffset - IncludesBeginOffset;
+ if (!affectsRange(Ranges, IncludesBeginOffset, IncludesEndOffset))
return;
SmallVector<unsigned, 16> Indices;
for (unsigned i = 0, e = Includes.size(); i != e; ++i)
@@ -1228,37 +1294,45 @@ static void sortCppIncludes(const FormatStyle &Style,
return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
});
+ // The index of the include on which the cursor will be put after
+ // sorting/deduplicating.
+ unsigned CursorIndex;
+ // The offset from cursor to the end of line.
+ unsigned CursorToEOLOffset;
+ if (Cursor)
+ std::tie(CursorIndex, CursorToEOLOffset) =
+ FindCursorIndex(Includes, Indices, *Cursor);
+
+ // Deduplicate #includes.
+ Indices.erase(std::unique(Indices.begin(), Indices.end(),
+ [&](unsigned LHSI, unsigned RHSI) {
+ return Includes[LHSI].Text == Includes[RHSI].Text;
+ }),
+ Indices.end());
// If the #includes are out of order, we generate a single replacement fixing
// the entire block. Otherwise, no replacement is generated.
- if (std::is_sorted(Indices.begin(), Indices.end()))
+ if (Indices.size() == Includes.size() &&
+ std::is_sorted(Indices.begin(), Indices.end()))
return;
std::string result;
- bool CursorMoved = false;
for (unsigned Index : Indices) {
if (!result.empty())
result += "\n";
result += Includes[Index].Text;
-
- if (Cursor && !CursorMoved) {
- unsigned Start = Includes[Index].Offset;
- unsigned End = Start + Includes[Index].Text.size();
- if (*Cursor >= Start && *Cursor < End) {
- *Cursor = Includes.front().Offset + result.size() + *Cursor - End;
- CursorMoved = true;
- }
- }
+ if (Cursor && CursorIndex == Index)
+ *Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset;
}
- // Sorting #includes shouldn't change their total number of characters.
- // This would otherwise mess up 'Ranges'.
- assert(result.size() ==
- Includes.back().Offset + Includes.back().Text.size() -
- Includes.front().Offset);
-
- Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset,
- result.size(), result));
+ auto Err = Replaces.add(tooling::Replacement(
+ FileName, Includes.front().Offset, IncludesBlockSize, result));
+ // FIXME: better error handling. For now, just skip the replacement for the
+ // release version.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ assert(false);
+ }
}
namespace {
@@ -1403,14 +1477,13 @@ processReplacements(T ProcessFunc, StringRef Code,
auto NewCode = applyAllReplacements(Code, Replaces);
if (!NewCode)
return NewCode.takeError();
- std::vector<tooling::Range> ChangedRanges =
- tooling::calculateChangedRanges(Replaces);
+ std::vector<tooling::Range> ChangedRanges = Replaces.getAffectedRanges();
StringRef FileName = Replaces.begin()->getFilePath();
tooling::Replacements FormatReplaces =
ProcessFunc(Style, *NewCode, ChangedRanges, FileName);
- return mergeReplacements(Replaces, FormatReplaces);
+ return Replaces.merge(FormatReplaces);
}
llvm::Expected<tooling::Replacements>
@@ -1441,14 +1514,31 @@ formatReplacements(StringRef Code, const tooling::Replacements &Replaces,
namespace {
inline bool isHeaderInsertion(const tooling::Replacement &Replace) {
- return Replace.getOffset() == UINT_MAX &&
+ return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 &&
llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
}
-void skipComments(Lexer &Lex, Token &Tok) {
- while (Tok.is(tok::comment))
- if (Lex.LexFromRawLexer(Tok))
- return;
+inline bool isHeaderDeletion(const tooling::Replacement &Replace) {
+ return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1;
+}
+
+// Returns the offset after skipping a sequence of tokens, matched by \p
+// GetOffsetAfterSequence, from the start of the code.
+// \p GetOffsetAfterSequence should be a function that matches a sequence of
+// tokens and returns an offset after the sequence.
+unsigned getOffsetAfterTokenSequence(
+ StringRef FileName, StringRef Code, const FormatStyle &Style,
+ std::function<unsigned(const SourceManager &, Lexer &, Token &)>
+ GetOffsetAfterSequense) {
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+ const SourceManager &SourceMgr = Env->getSourceManager();
+ Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
+ getFormattingLangOpts(Style));
+ Token Tok;
+ // Get the first token.
+ Lex.LexFromRawLexer(Tok);
+ return GetOffsetAfterSequense(SourceMgr, Lex, Tok);
}
// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
@@ -1464,32 +1554,90 @@ bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) {
return Matched;
}
+void skipComments(Lexer &Lex, Token &Tok) {
+ while (Tok.is(tok::comment))
+ if (Lex.LexFromRawLexer(Tok))
+ return;
+}
+
+// Returns the offset after header guard directives and any comments
+// before/after header guards. If no header guard presents in the code, this
+// will returns the offset after skipping all comments from the start of the
+// code.
unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
StringRef Code,
const FormatStyle &Style) {
- std::unique_ptr<Environment> Env =
- Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
- const SourceManager &SourceMgr = Env->getSourceManager();
- Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
- getFormattingLangOpts(Style));
- Token Tok;
- // Get the first token.
- Lex.LexFromRawLexer(Tok);
- skipComments(Lex, Tok);
- unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation());
- if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
- skipComments(Lex, Tok);
- if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
- return SourceMgr.getFileOffset(Tok.getLocation());
+ return getOffsetAfterTokenSequence(
+ FileName, Code, Style,
+ [](const SourceManager &SM, Lexer &Lex, Token Tok) {
+ skipComments(Lex, Tok);
+ unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
+ if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+ skipComments(Lex, Tok);
+ if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
+ return SM.getFileOffset(Tok.getLocation());
+ }
+ return InitialOffset;
+ });
+}
+
+// Check if a sequence of tokens is like
+// "#include ("header.h" | <header.h>)".
+// If it is, \p Tok will be the token after this directive; otherwise, it can be
+// any token after the given \p Tok (including \p Tok).
+bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
+ auto Matched = [&]() {
+ Lex.LexFromRawLexer(Tok);
+ return true;
+ };
+ if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
+ Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
+ if (Lex.LexFromRawLexer(Tok))
+ return false;
+ if (Tok.is(tok::string_literal))
+ return Matched();
+ if (Tok.is(tok::less)) {
+ while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
+ }
+ if (Tok.is(tok::greater))
+ return Matched();
+ }
}
- return AfterComments;
+ return false;
+}
+
+// Returns the offset of the last #include directive after which a new
+// #include can be inserted. This ignores #include's after the #include block(s)
+// in the beginning of a file to avoid inserting headers into code sections
+// where new #include's should not be added by default.
+// These code sections include:
+// - raw string literals (containing #include).
+// - #if blocks.
+// - Special #include's among declarations (e.g. functions).
+//
+// If no #include after which a new #include can be inserted, this returns the
+// offset after skipping all comments from the start of the code.
+// Inserting after an #include is not allowed if it comes after code that is not
+// #include (e.g. pre-processing directive that is not #include, declarations).
+unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
+ const FormatStyle &Style) {
+ return getOffsetAfterTokenSequence(
+ FileName, Code, Style,
+ [](const SourceManager &SM, Lexer &Lex, Token Tok) {
+ skipComments(Lex, Tok);
+ unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
+ while (checkAndConsumeInclusiveDirective(Lex, Tok))
+ MaxOffset = SM.getFileOffset(Tok.getLocation());
+ return MaxOffset;
+ });
+}
+
+bool isDeletedHeader(llvm::StringRef HeaderName,
+ const std::set<llvm::StringRef> &HeadersToDelete) {
+ return HeadersToDelete.count(HeaderName) ||
+ HeadersToDelete.count(HeaderName.trim("\"<>"));
}
-// FIXME: we also need to insert a '\n' at the end of the code if we have an
-// insertion with offset Code.size(), and there is no '\n' at the end of the
-// code.
-// FIXME: do not insert headers into conditional #include blocks, e.g. #includes
-// surrounded by compile condition "#if...".
// FIXME: insert empty lines between newly created blocks.
tooling::Replacements
fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
@@ -1498,20 +1646,25 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
return Replaces;
tooling::Replacements HeaderInsertions;
+ std::set<llvm::StringRef> HeadersToDelete;
+ tooling::Replacements Result;
for (const auto &R : Replaces) {
- if (isHeaderInsertion(R))
- HeaderInsertions.insert(R);
- else if (R.getOffset() == UINT_MAX)
+ if (isHeaderInsertion(R)) {
+ // Replacements from \p Replaces must be conflict-free already, so we can
+ // simply consume the error.
+ llvm::consumeError(HeaderInsertions.add(R));
+ } else if (isHeaderDeletion(R)) {
+ HeadersToDelete.insert(R.getReplacementText());
+ } else if (R.getOffset() == UINT_MAX) {
llvm::errs() << "Insertions other than header #include insertion are "
"not supported! "
<< R.getReplacementText() << "\n";
+ } else {
+ llvm::consumeError(Result.add(R));
+ }
}
- if (HeaderInsertions.empty())
+ if (HeaderInsertions.empty() && HeadersToDelete.empty())
return Replaces;
- tooling::Replacements Result;
- std::set_difference(Replaces.begin(), Replaces.end(),
- HeaderInsertions.begin(), HeaderInsertions.end(),
- std::inserter(Result, Result.begin()));
llvm::Regex IncludeRegex(IncludeRegexPattern);
llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)");
@@ -1532,6 +1685,10 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
unsigned MinInsertOffset =
getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
+ // Max insertion offset in the original code.
+ unsigned MaxInsertOffset =
+ MinInsertOffset +
+ getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style);
SmallVector<StringRef, 32> Lines;
TrimmedCode.split(Lines, '\n');
unsigned Offset = MinInsertOffset;
@@ -1540,13 +1697,30 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
for (auto Line : Lines) {
NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
if (IncludeRegex.match(Line, &Matches)) {
+ // The header name with quotes or angle brackets.
StringRef IncludeName = Matches[2];
ExistingIncludes.insert(IncludeName);
- int Category = Categories.getIncludePriority(
- IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
- CategoryEndOffsets[Category] = NextLineOffset;
- if (FirstIncludeOffset < 0)
- FirstIncludeOffset = Offset;
+ // Only record the offset of current #include if we can insert after it.
+ if (Offset <= MaxInsertOffset) {
+ int Category = Categories.getIncludePriority(
+ IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
+ CategoryEndOffsets[Category] = NextLineOffset;
+ if (FirstIncludeOffset < 0)
+ FirstIncludeOffset = Offset;
+ }
+ if (isDeletedHeader(IncludeName, HeadersToDelete)) {
+ // If this is the last line without trailing newline, we need to make
+ // sure we don't delete across the file boundary.
+ unsigned Length = std::min(Line.size() + 1, Code.size() - Offset);
+ llvm::Error Err =
+ Result.add(tooling::Replacement(FileName, Offset, Length, ""));
+ if (Err) {
+ // Ignore the deletion on conflict.
+ llvm::errs() << "Failed to add header deletion replacement for "
+ << IncludeName << ": " << llvm::toString(std::move(Err))
+ << "\n";
+ }
+ }
}
Offset = NextLineOffset;
}
@@ -1570,6 +1744,7 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
+ bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n';
for (const auto &R : HeaderInsertions) {
auto IncludeDirective = R.getReplacementText();
bool Matched = IncludeRegex.match(IncludeDirective, &Matches);
@@ -1588,7 +1763,20 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
std::string NewInclude = !IncludeDirective.endswith("\n")
? (IncludeDirective + "\n").str()
: IncludeDirective.str();
- Result.insert(tooling::Replacement(FileName, Offset, 0, NewInclude));
+ // When inserting headers at end of the code, also append '\n' to the code
+ // if it does not end with '\n'.
+ if (NeedNewLineAtEnd && Offset == Code.size()) {
+ NewInclude = "\n" + NewInclude;
+ NeedNewLineAtEnd = false;
+ }
+ auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude);
+ auto Err = Result.add(NewReplace);
+ if (Err) {
+ llvm::consumeError(std::move(Err));
+ unsigned NewOffset = Result.getShiftedCodePosition(Offset);
+ NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude);
+ Result = Result.merge(tooling::Replacements(NewReplace));
+ }
}
return Result;
}
@@ -1611,18 +1799,6 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
return processReplacements(Cleanup, Code, NewReplaces, Style);
}
-tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SM,
- FileID ID, ArrayRef<CharSourceRange> Ranges,
- bool *IncompleteFormat) {
- FormatStyle Expanded = expandPresets(Style);
- if (Expanded.DisableFormat)
- return tooling::Replacements();
-
- Environment Env(SM, ID, Ranges);
- Formatter Format(Env, Expanded, IncompleteFormat);
- return Format.process();
-}
-
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
ArrayRef<tooling::Range> Ranges,
StringRef FileName, bool *IncompleteFormat) {
@@ -1630,19 +1806,28 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
if (Expanded.DisableFormat)
return tooling::Replacements();
- std::unique_ptr<Environment> Env =
- Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+ auto Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) {
+ JavaScriptRequoter Requoter(*Env, Expanded);
+ tooling::Replacements Requotes = Requoter.process();
+ if (!Requotes.empty()) {
+ auto NewCode = applyAllReplacements(Code, Requotes);
+ if (NewCode) {
+ auto NewEnv = Environment::CreateVirtualEnvironment(
+ *NewCode, FileName,
+ tooling::calculateRangesAfterReplacements(Requotes, Ranges));
+ Formatter Format(*NewEnv, Expanded, IncompleteFormat);
+ return Requotes.merge(Format.process());
+ }
+ }
+ }
+
Formatter Format(*Env, Expanded, IncompleteFormat);
return Format.process();
}
-tooling::Replacements cleanup(const FormatStyle &Style, SourceManager &SM,
- FileID ID, ArrayRef<CharSourceRange> Ranges) {
- Environment Env(SM, ID, Ranges);
- Cleaner Clean(Env, Style);
- return Clean.process();
-}
-
tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
ArrayRef<tooling::Range> Ranges,
StringRef FileName) {
@@ -1684,6 +1869,8 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
return FormatStyle::LK_Java;
if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
+ if (FileName.endswith(".m") || FileName.endswith(".mm"))
+ return FormatStyle::LK_ObjC;
if (FileName.endswith_lower(".proto") ||
FileName.endswith_lower(".protodevel"))
return FormatStyle::LK_Proto;
@@ -1693,12 +1880,21 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
}
FormatStyle getStyle(StringRef StyleName, StringRef FileName,
- StringRef FallbackStyle, vfs::FileSystem *FS) {
+ StringRef FallbackStyle, StringRef Code,
+ vfs::FileSystem *FS) {
if (!FS) {
FS = vfs::getRealFileSystem().get();
}
FormatStyle Style = getLLVMStyle();
Style.Language = getLanguageByFileName(FileName);
+
+ // This is a very crude detection of whether a header contains ObjC code that
+ // should be improved over time and probably be done on tokens, not one the
+ // bare content of the file.
+ if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") &&
+ (Code.contains("\n- (") || Code.contains("\n+ (")))
+ Style.Language = FormatStyle::LK_ObjC;
+
if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
llvm::errs() << "Invalid fallback style \"" << FallbackStyle
<< "\" using LLVM style\n";
@@ -1724,7 +1920,11 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName,
// Look for .clang-format/_clang-format file in the file's parent directories.
SmallString<128> UnsuitableConfigFiles;
SmallString<128> Path(FileName);
- llvm::sys::fs::make_absolute(Path);
+ if (std::error_code EC = FS->makeAbsolute(Path)) {
+ llvm::errs() << EC.message() << "\n";
+ return Style;
+ }
+
for (StringRef Directory = Path; !Directory.empty();
Directory = llvm::sys::path::parent_path(Directory)) {