summaryrefslogtreecommitdiff
path: root/clang/lib/Tooling/Transformer
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Tooling/Transformer')
-rw-r--r--clang/lib/Tooling/Transformer/Parsing.cpp279
-rw-r--r--clang/lib/Tooling/Transformer/RangeSelector.cpp9
-rw-r--r--clang/lib/Tooling/Transformer/RewriteRule.cpp68
-rw-r--r--clang/lib/Tooling/Transformer/SourceCode.cpp370
-rw-r--r--clang/lib/Tooling/Transformer/Stencil.cpp66
-rw-r--r--clang/lib/Tooling/Transformer/Transformer.cpp44
6 files changed, 754 insertions, 82 deletions
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp
new file mode 100644
index 0000000000000..1579115b93138
--- /dev/null
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -0,0 +1,279 @@
+//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Transformer/Parsing.h"
+#include "clang/AST/Expr.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/Transformer/RangeSelector.h"
+#include "clang/Tooling/Transformer/SourceCode.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace clang;
+using namespace transformer;
+
+// FIXME: This implementation is entirely separate from that of the AST
+// matchers. Given the similarity of the languages and uses of the two parsers,
+// the two should share a common parsing infrastructure, as should other
+// Transformer types. We intend to unify this implementation soon to share as
+// much as possible with the AST Matchers parsing.
+
+namespace {
+using llvm::Error;
+using llvm::Expected;
+
+template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
+
+struct ParseState {
+ // The remaining input to be processed.
+ StringRef Input;
+ // The original input. Not modified during parsing; only for reference in
+ // error reporting.
+ StringRef OriginalInput;
+};
+
+// Represents an intermediate result returned by a parsing function. Functions
+// that don't generate values should use `llvm::None`
+template <typename ResultType> struct ParseProgress {
+ ParseState State;
+ // Intermediate result generated by the Parser.
+ ResultType Value;
+};
+
+template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
+template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
+
+class ParseError : public llvm::ErrorInfo<ParseError> {
+public:
+ // Required field for all ErrorInfo derivatives.
+ static char ID;
+
+ ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
+ : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
+ Excerpt(std::move(InputExcerpt)) {}
+
+ void log(llvm::raw_ostream &OS) const override {
+ OS << "parse error at position (" << Pos << "): " << ErrorMsg
+ << ": " + Excerpt;
+ }
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+
+ // Position of the error in the input string.
+ size_t Pos;
+ std::string ErrorMsg;
+ // Excerpt of the input starting at the error position.
+ std::string Excerpt;
+};
+
+char ParseError::ID;
+} // namespace
+
+static const llvm::StringMap<RangeSelectorOp<std::string>> &
+getUnaryStringSelectors() {
+ static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
+ {"name", name},
+ {"node", node},
+ {"statement", statement},
+ {"statements", statements},
+ {"member", member},
+ {"callArgs", callArgs},
+ {"elseBranch", elseBranch},
+ {"initListElements", initListElements}};
+ return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
+getUnaryRangeSelectors() {
+ static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
+ {"before", before}, {"after", after}, {"expansion", expansion}};
+ return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
+getBinaryStringSelectors() {
+ static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
+ {"encloseNodes", range}};
+ return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
+getBinaryRangeSelectors() {
+ static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
+ M = {{"enclose", range}};
+ return M;
+}
+
+template <typename Element>
+llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
+ llvm::StringRef Key) {
+ auto it = Map.find(Key);
+ if (it == Map.end())
+ return llvm::None;
+ return it->second;
+}
+
+template <typename ResultType>
+ParseProgress<ResultType> makeParseProgress(ParseState State,
+ ResultType Result) {
+ return ParseProgress<ResultType>{State, std::move(Result)};
+}
+
+static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
+ size_t Pos = S.OriginalInput.size() - S.Input.size();
+ return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
+ S.OriginalInput.substr(Pos, 20).str());
+}
+
+// Returns a new ParseState that advances \c S by \c N characters.
+static ParseState advance(ParseState S, size_t N) {
+ S.Input = S.Input.drop_front(N);
+ return S;
+}
+
+static StringRef consumeWhitespace(StringRef S) {
+ return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); });
+}
+
+// Parses a single expected character \c c from \c State, skipping preceding
+// whitespace. Error if the expected character isn't found.
+static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
+ State.Input = consumeWhitespace(State.Input);
+ if (State.Input.empty() || State.Input.front() != c)
+ return makeParseError(State,
+ ("expected char not found: " + llvm::Twine(c)).str());
+ return makeParseProgress(advance(State, 1), llvm::None);
+}
+
+// Parses an identitifer "token" -- handles preceding whitespace.
+static ExpectedProgress<std::string> parseId(ParseState State) {
+ State.Input = consumeWhitespace(State.Input);
+ auto Id = State.Input.take_while(
+ [](char c) { return c >= 0 && isIdentifierBody(c); });
+ if (Id.empty())
+ return makeParseError(State, "failed to parse name");
+ return makeParseProgress(advance(State, Id.size()), Id.str());
+}
+
+// For consistency with the AST matcher parser and C++ code, node ids are
+// written as strings. However, we do not support escaping in the string.
+static ExpectedProgress<std::string> parseStringId(ParseState State) {
+ State.Input = consumeWhitespace(State.Input);
+ if (State.Input.empty())
+ return makeParseError(State, "unexpected end of input");
+ if (!State.Input.consume_front("\""))
+ return makeParseError(
+ State,
+ "expecting string, but encountered other character or end of input");
+
+ StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
+ if (State.Input.size() == Id.size())
+ return makeParseError(State, "unterminated string");
+ // Advance past the trailing quote as well.
+ return makeParseProgress(advance(State, Id.size() + 1), Id.str());
+}
+
+// Parses a single element surrounded by parens. `Op` is applied to the parsed
+// result to create the result of this function call.
+template <typename T>
+ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
+ RangeSelectorOp<T> Op,
+ ParseState State) {
+ auto P = parseChar('(', State);
+ if (!P)
+ return P.takeError();
+
+ auto E = ParseElement(P->State);
+ if (!E)
+ return E.takeError();
+
+ P = parseChar(')', E->State);
+ if (!P)
+ return P.takeError();
+
+ return makeParseProgress(P->State, Op(std::move(E->Value)));
+}
+
+// Parses a pair of elements surrounded by parens and separated by comma. `Op`
+// is applied to the parsed results to create the result of this function call.
+template <typename T>
+ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
+ RangeSelectorOp<T, T> Op,
+ ParseState State) {
+ auto P = parseChar('(', State);
+ if (!P)
+ return P.takeError();
+
+ auto Left = ParseElement(P->State);
+ if (!Left)
+ return Left.takeError();
+
+ P = parseChar(',', Left->State);
+ if (!P)
+ return P.takeError();
+
+ auto Right = ParseElement(P->State);
+ if (!Right)
+ return Right.takeError();
+
+ P = parseChar(')', Right->State);
+ if (!P)
+ return P.takeError();
+
+ return makeParseProgress(P->State,
+ Op(std::move(Left->Value), std::move(Right->Value)));
+}
+
+// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
+// Id operator). Returns StencilType representing the operator on success and
+// error if it fails to parse input for an operator.
+static ExpectedProgress<RangeSelector>
+parseRangeSelectorImpl(ParseState State) {
+ auto Id = parseId(State);
+ if (!Id)
+ return Id.takeError();
+
+ std::string OpName = std::move(Id->Value);
+ if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
+ return parseSingle(parseStringId, *Op, Id->State);
+
+ if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
+ return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
+
+ if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
+ return parsePair(parseStringId, *Op, Id->State);
+
+ if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
+ return parsePair(parseRangeSelectorImpl, *Op, Id->State);
+
+ return makeParseError(State, "unknown selector name: " + OpName);
+}
+
+Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
+ ParseState State = {Input, Input};
+ ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
+ if (!Result)
+ return Result.takeError();
+ State = Result->State;
+ // Discard any potentially trailing whitespace.
+ State.Input = consumeWhitespace(State.Input);
+ if (State.Input.empty())
+ return Result->Value;
+ return makeParseError(State, "unexpected input after selector");
+}
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 9f81423c9022b..29b1a5b0372ea 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -23,8 +23,6 @@ using namespace clang;
using namespace transformer;
using ast_matchers::MatchFinder;
-using ast_type_traits::ASTNodeKind;
-using ast_type_traits::DynTypedNode;
using llvm::Error;
using llvm::StringError;
@@ -148,7 +146,7 @@ RangeSelector transformer::statement(std::string ID) {
};
}
-RangeSelector transformer::range(RangeSelector Begin, RangeSelector End) {
+RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
Expected<CharSourceRange> BeginRange = Begin(Result);
if (!BeginRange)
@@ -167,8 +165,9 @@ RangeSelector transformer::range(RangeSelector Begin, RangeSelector End) {
};
}
-RangeSelector transformer::range(std::string BeginID, std::string EndID) {
- return transformer::range(node(std::move(BeginID)), node(std::move(EndID)));
+RangeSelector transformer::encloseNodes(std::string BeginID,
+ std::string EndID) {
+ return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
}
RangeSelector transformer::member(std::string ID) {
diff --git a/clang/lib/Tooling/Transformer/RewriteRule.cpp b/clang/lib/Tooling/Transformer/RewriteRule.cpp
index 20d3a371950af..995bec03cd669 100644
--- a/clang/lib/Tooling/Transformer/RewriteRule.cpp
+++ b/clang/lib/Tooling/Transformer/RewriteRule.cpp
@@ -25,16 +25,14 @@ using namespace transformer;
using ast_matchers::MatchFinder;
using ast_matchers::internal::DynTypedMatcher;
-using ast_type_traits::ASTNodeKind;
using MatchResult = MatchFinder::MatchResult;
-Expected<SmallVector<transformer::detail::Transformation, 1>>
-transformer::detail::translateEdits(const MatchResult &Result,
- llvm::ArrayRef<ASTEdit> Edits) {
- SmallVector<transformer::detail::Transformation, 1> Transformations;
- for (const auto &Edit : Edits) {
- Expected<CharSourceRange> Range = Edit.TargetRange(Result);
+static Expected<SmallVector<transformer::Edit, 1>>
+translateEdits(const MatchResult &Result, ArrayRef<ASTEdit> ASTEdits) {
+ SmallVector<transformer::Edit, 1> Edits;
+ for (const auto &E : ASTEdits) {
+ Expected<CharSourceRange> Range = E.TargetRange(Result);
if (!Range)
return Range.takeError();
llvm::Optional<CharSourceRange> EditRange =
@@ -42,21 +40,34 @@ transformer::detail::translateEdits(const MatchResult &Result,
// FIXME: let user specify whether to treat this case as an error or ignore
// it as is currently done.
if (!EditRange)
- return SmallVector<Transformation, 0>();
- auto Replacement = Edit.Replacement->eval(Result);
+ return SmallVector<Edit, 0>();
+ auto Replacement = E.Replacement->eval(Result);
if (!Replacement)
return Replacement.takeError();
- transformer::detail::Transformation T;
+ transformer::Edit T;
T.Range = *EditRange;
T.Replacement = std::move(*Replacement);
- Transformations.push_back(std::move(T));
+ T.Metadata = E.Metadata;
+ Edits.push_back(std::move(T));
}
- return Transformations;
+ return Edits;
}
-ASTEdit transformer::changeTo(RangeSelector S, TextGenerator Replacement) {
+EditGenerator transformer::editList(SmallVector<ASTEdit, 1> Edits) {
+ return [Edits = std::move(Edits)](const MatchResult &Result) {
+ return translateEdits(Result, Edits);
+ };
+}
+
+EditGenerator transformer::edit(ASTEdit Edit) {
+ return [Edit = std::move(Edit)](const MatchResult &Result) {
+ return translateEdits(Result, {Edit});
+ };
+}
+
+ASTEdit transformer::changeTo(RangeSelector Target, TextGenerator Replacement) {
ASTEdit E;
- E.TargetRange = std::move(S);
+ E.TargetRange = std::move(Target);
E.Replacement = std::move(Replacement);
return E;
}
@@ -83,8 +94,9 @@ ASTEdit transformer::remove(RangeSelector S) {
return change(std::move(S), std::make_shared<SimpleTextGenerator>(""));
}
-RewriteRule transformer::makeRule(DynTypedMatcher M, SmallVector<ASTEdit, 1> Edits,
- TextGenerator Explanation) {
+RewriteRule transformer::makeRule(ast_matchers::internal::DynTypedMatcher M,
+ EditGenerator Edits,
+ TextGenerator Explanation) {
return RewriteRule{{RewriteRule::Case{
std::move(M), std::move(Edits), std::move(Explanation), {}}}};
}
@@ -105,10 +117,13 @@ static bool hasValidKind(const DynTypedMatcher &M) {
#endif
// Binds each rule's matcher to a unique (and deterministic) tag based on
-// `TagBase` and the id paired with the case.
+// `TagBase` and the id paired with the case. All of the returned matchers have
+// their traversal kind explicitly set, either based on a pre-set kind or to the
+// provided `DefaultTraversalKind`.
static std::vector<DynTypedMatcher> taggedMatchers(
StringRef TagBase,
- const SmallVectorImpl<std::pair<size_t, RewriteRule::Case>> &Cases) {
+ const SmallVectorImpl<std::pair<size_t, RewriteRule::Case>> &Cases,
+ ast_type_traits::TraversalKind DefaultTraversalKind) {
std::vector<DynTypedMatcher> Matchers;
Matchers.reserve(Cases.size());
for (const auto &Case : Cases) {
@@ -116,8 +131,10 @@ static std::vector<DynTypedMatcher> taggedMatchers(
// HACK: Many matchers are not bindable, so ensure that tryBind will work.
DynTypedMatcher BoundMatcher(Case.second.Matcher);
BoundMatcher.setAllowBind(true);
- auto M = BoundMatcher.tryBind(Tag);
- Matchers.push_back(*std::move(M));
+ auto M = *BoundMatcher.tryBind(Tag);
+ Matchers.push_back(!M.getTraversalKind()
+ ? M.withTraversalKind(DefaultTraversalKind)
+ : std::move(M));
}
return Matchers;
}
@@ -147,14 +164,21 @@ transformer::detail::buildMatchers(const RewriteRule &Rule) {
Buckets[Cases[I].Matcher.getSupportedKind()].emplace_back(I, Cases[I]);
}
+ // Each anyOf explicitly controls the traversal kind. The anyOf itself is set
+ // to `TK_AsIs` to ensure no nodes are skipped, thereby deferring to the kind
+ // of the branches. Then, each branch is either left as is, if the kind is
+ // already set, or explicitly set to `TK_IgnoreUnlessSpelledInSource`. We
+ // choose this setting, because we think it is the one most friendly to
+ // beginners, who are (largely) the target audience of Transformer.
std::vector<DynTypedMatcher> Matchers;
for (const auto &Bucket : Buckets) {
DynTypedMatcher M = DynTypedMatcher::constructVariadic(
DynTypedMatcher::VO_AnyOf, Bucket.first,
- taggedMatchers("Tag", Bucket.second));
+ taggedMatchers("Tag", Bucket.second, TK_IgnoreUnlessSpelledInSource));
M.setAllowBind(true);
// `tryBind` is guaranteed to succeed, because `AllowBind` was set to true.
- Matchers.push_back(*M.tryBind(RewriteRule::RootID));
+ Matchers.push_back(
+ M.tryBind(RewriteRule::RootID)->withTraversalKind(TK_AsIs));
}
return Matchers;
}
diff --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp
index 836401d1e605c..26b204851f058 100644
--- a/clang/lib/Tooling/Transformer/SourceCode.cpp
+++ b/clang/lib/Tooling/Transformer/SourceCode.cpp
@@ -10,10 +10,24 @@
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Transformer/SourceCode.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/Comment.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include <set>
using namespace clang;
+using llvm::errc;
+using llvm::StringError;
+
StringRef clang::tooling::getText(CharSourceRange Range,
const ASTContext &Context) {
return Lexer::getSourceText(Range, Context.getSourceManager(),
@@ -23,11 +37,45 @@ StringRef clang::tooling::getText(CharSourceRange Range,
CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range,
tok::TokenKind Next,
ASTContext &Context) {
- Optional<Token> Tok = Lexer::findNextToken(
- Range.getEnd(), Context.getSourceManager(), Context.getLangOpts());
- if (!Tok || !Tok->is(Next))
+ CharSourceRange R = Lexer::getAsCharRange(Range, Context.getSourceManager(),
+ Context.getLangOpts());
+ if (R.isInvalid())
+ return Range;
+ Token Tok;
+ bool Err =
+ Lexer::getRawToken(R.getEnd(), Tok, Context.getSourceManager(),
+ Context.getLangOpts(), /*IgnoreWhiteSpace=*/true);
+ if (Err || !Tok.is(Next))
return Range;
- return CharSourceRange::getTokenRange(Range.getBegin(), Tok->getLocation());
+ return CharSourceRange::getTokenRange(Range.getBegin(), Tok.getLocation());
+}
+
+llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range,
+ const SourceManager &SM) {
+ if (Range.isInvalid())
+ return llvm::make_error<StringError>(errc::invalid_argument,
+ "Invalid range");
+
+ if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
+ return llvm::make_error<StringError>(
+ errc::invalid_argument, "Range starts or ends in a macro expansion");
+
+ if (SM.isInSystemHeader(Range.getBegin()) ||
+ SM.isInSystemHeader(Range.getEnd()))
+ return llvm::make_error<StringError>(errc::invalid_argument,
+ "Range is in system header");
+
+ std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin());
+ std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd());
+ if (BeginInfo.first != EndInfo.first)
+ return llvm::make_error<StringError>(
+ errc::invalid_argument, "Range begins and ends in different files");
+
+ if (BeginInfo.second > EndInfo.second)
+ return llvm::make_error<StringError>(
+ errc::invalid_argument, "Range's begin is past its end");
+
+ return llvm::Error::success();
}
llvm::Optional<CharSourceRange>
@@ -46,20 +94,308 @@ clang::tooling::getRangeForEdit(const CharSourceRange &EditRange,
// foo(DO_NOTHING(6))
// Decide whether the current behavior is desirable and modify if not.
CharSourceRange Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts);
- if (Range.isInvalid())
- return None;
+ bool IsInvalid = llvm::errorToBool(validateEditRange(Range, SM));
+ if (IsInvalid)
+ return llvm::None;
+ return Range;
- if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
- return None;
- if (SM.isInSystemHeader(Range.getBegin()) ||
- SM.isInSystemHeader(Range.getEnd()))
- return None;
+}
- std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin());
- std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd());
- if (BeginInfo.first != EndInfo.first ||
- BeginInfo.second > EndInfo.second)
- return None;
+static bool startsWithNewline(const SourceManager &SM, const Token &Tok) {
+ return isVerticalWhitespace(SM.getCharacterData(Tok.getLocation())[0]);
+}
- return Range;
+static bool contains(const std::set<tok::TokenKind> &Terminators,
+ const Token &Tok) {
+ return Terminators.count(Tok.getKind()) > 0;
+}
+
+// Returns the exclusive, *file* end location of the entity whose last token is
+// at location 'EntityLast'. That is, it returns the location one past the last
+// relevant character.
+//
+// Associated tokens include comments, horizontal whitespace and 'Terminators'
+// -- optional tokens, which, if any are found, will be included; if
+// 'Terminators' is empty, we will not include any extra tokens beyond comments
+// and horizontal whitespace.
+static SourceLocation
+getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast,
+ const std::set<tok::TokenKind> &Terminators,
+ const LangOptions &LangOpts) {
+ assert(EntityLast.isValid() && "Invalid end location found.");
+
+ // We remember the last location of a non-horizontal-whitespace token we have
+ // lexed; this is the location up to which we will want to delete.
+ // FIXME: Support using the spelling loc here for cases where we want to
+ // analyze the macro text.
+
+ CharSourceRange ExpansionRange = SM.getExpansionRange(EntityLast);
+ // FIXME: Should check isTokenRange(), for the (rare) case that
+ // `ExpansionRange` is a character range.
+ std::unique_ptr<Lexer> Lexer = [&]() {
+ bool Invalid = false;
+ auto FileOffset = SM.getDecomposedLoc(ExpansionRange.getEnd());
+ llvm::StringRef File = SM.getBufferData(FileOffset.first, &Invalid);
+ assert(!Invalid && "Cannot get file/offset");
+ return std::make_unique<clang::Lexer>(
+ SM.getLocForStartOfFile(FileOffset.first), LangOpts, File.begin(),
+ File.data() + FileOffset.second, File.end());
+ }();
+
+ // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown).
+ Lexer->SetKeepWhitespaceMode(true);
+
+ // Generally, the code we want to include looks like this ([] are optional),
+ // If Terminators is empty:
+ // [ <comment> ] [ <newline> ]
+ // Otherwise:
+ // ... <terminator> [ <comment> ] [ <newline> ]
+
+ Token Tok;
+ bool Terminated = false;
+
+ // First, lex to the current token (which is the last token of the range that
+ // is definitely associated with the decl). Then, we process the first token
+ // separately from the rest based on conditions that hold specifically for
+ // that first token.
+ //
+ // We do not search for a terminator if none is required or we've already
+ // encountered it. Otherwise, if the original `EntityLast` location was in a
+ // macro expansion, we don't have visibility into the text, so we assume we've
+ // already terminated. However, we note this assumption with
+ // `TerminatedByMacro`, because we'll want to handle it somewhat differently
+ // for the terminators semicolon and comma. These terminators can be safely
+ // associated with the entity when they appear after the macro -- extra
+ // semicolons have no effect on the program and a well-formed program won't
+ // have multiple commas in a row, so we're guaranteed that there is only one.
+ //
+ // FIXME: This handling of macros is more conservative than necessary. When
+ // the end of the expansion coincides with the end of the node, we can still
+ // safely analyze the code. But, it is more complicated, because we need to
+ // start by lexing the spelling loc for the first token and then switch to the
+ // expansion loc.
+ bool TerminatedByMacro = false;
+ Lexer->LexFromRawLexer(Tok);
+ if (Terminators.empty() || contains(Terminators, Tok))
+ Terminated = true;
+ else if (EntityLast.isMacroID()) {
+ Terminated = true;
+ TerminatedByMacro = true;
+ }
+
+ // We save the most recent candidate for the exclusive end location.
+ SourceLocation End = Tok.getEndLoc();
+
+ while (!Terminated) {
+ // Lex the next token we want to possibly expand the range with.
+ Lexer->LexFromRawLexer(Tok);
+
+ switch (Tok.getKind()) {
+ case tok::eof:
+ // Unexpected separators.
+ case tok::l_brace:
+ case tok::r_brace:
+ case tok::comma:
+ return End;
+ // Whitespace pseudo-tokens.
+ case tok::unknown:
+ if (startsWithNewline(SM, Tok))
+ // Include at least until the end of the line.
+ End = Tok.getEndLoc();
+ break;
+ default:
+ if (contains(Terminators, Tok))
+ Terminated = true;
+ End = Tok.getEndLoc();
+ break;
+ }
+ }
+
+ do {
+ // Lex the next token we want to possibly expand the range with.
+ Lexer->LexFromRawLexer(Tok);
+
+ switch (Tok.getKind()) {
+ case tok::unknown:
+ if (startsWithNewline(SM, Tok))
+ // We're done, but include this newline.
+ return Tok.getEndLoc();
+ break;
+ case tok::comment:
+ // Include any comments we find on the way.
+ End = Tok.getEndLoc();
+ break;
+ case tok::semi:
+ case tok::comma:
+ if (TerminatedByMacro && contains(Terminators, Tok)) {
+ End = Tok.getEndLoc();
+ // We've found a real terminator.
+ TerminatedByMacro = false;
+ break;
+ }
+ // Found an unrelated token; stop and don't include it.
+ return End;
+ default:
+ // Found an unrelated token; stop and don't include it.
+ return End;
+ }
+ } while (true);
+}
+
+// Returns the expected terminator tokens for the given declaration.
+//
+// If we do not know the correct terminator token, returns an empty set.
+//
+// There are cases where we have more than one possible terminator (for example,
+// we find either a comma or a semicolon after a VarDecl).
+static std::set<tok::TokenKind> getTerminators(const Decl &D) {
+ if (llvm::isa<RecordDecl>(D) || llvm::isa<UsingDecl>(D))
+ return {tok::semi};
+
+ if (llvm::isa<FunctionDecl>(D) || llvm::isa<LinkageSpecDecl>(D))
+ return {tok::r_brace, tok::semi};
+
+ if (llvm::isa<VarDecl>(D) || llvm::isa<FieldDecl>(D))
+ return {tok::comma, tok::semi};
+
+ return {};
+}
+
+// Starting from `Loc`, skips whitespace up to, and including, a single
+// newline. Returns the (exclusive) end of any skipped whitespace (that is, the
+// location immediately after the whitespace).
+static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM,
+ SourceLocation Loc,
+ const LangOptions &LangOpts) {
+ const char *LocChars = SM.getCharacterData(Loc);
+ int i = 0;
+ while (isHorizontalWhitespace(LocChars[i]))
+ ++i;
+ if (isVerticalWhitespace(LocChars[i]))
+ ++i;
+ return Loc.getLocWithOffset(i);
+}
+
+// Is `Loc` separated from any following decl by something meaningful (e.g. an
+// empty line, a comment), ignoring horizontal whitespace? Since this is a
+// heuristic, we return false when in doubt. `Loc` cannot be the first location
+// in the file.
+static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc,
+ const LangOptions &LangOpts) {
+ // If the preceding character is a newline, we'll check for an empty line as a
+ // separator. However, we can't identify an empty line using tokens, so we
+ // analyse the characters. If we try to use tokens, we'll just end up with a
+ // whitespace token, whose characters we'd have to analyse anyhow.
+ bool Invalid = false;
+ const char *LocChars =
+ SM.getCharacterData(Loc.getLocWithOffset(-1), &Invalid);
+ assert(!Invalid &&
+ "Loc must be a valid character and not the first of the source file.");
+ if (isVerticalWhitespace(LocChars[0])) {
+ for (int i = 1; isWhitespace(LocChars[i]); ++i)
+ if (isVerticalWhitespace(LocChars[i]))
+ return true;
+ }
+ // We didn't find an empty line, so lex the next token, skipping past any
+ // whitespace we just scanned.
+ Token Tok;
+ bool Failed = Lexer::getRawToken(Loc, Tok, SM, LangOpts,
+ /*IgnoreWhiteSpace=*/true);
+ if (Failed)
+ // Any text that confuses the lexer seems fair to consider a separation.
+ return true;
+
+ switch (Tok.getKind()) {
+ case tok::comment:
+ case tok::l_brace:
+ case tok::r_brace:
+ case tok::eof:
+ return true;
+ default:
+ return false;
+ }
+}
+
+CharSourceRange tooling::getAssociatedRange(const Decl &Decl,
+ ASTContext &Context) {
+ const SourceManager &SM = Context.getSourceManager();
+ const LangOptions &LangOpts = Context.getLangOpts();
+ CharSourceRange Range = CharSourceRange::getTokenRange(Decl.getSourceRange());
+
+ // First, expand to the start of the template<> declaration if necessary.
+ if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(&Decl)) {
+ if (const auto *T = Record->getDescribedClassTemplate())
+ if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
+ Range.setBegin(T->getBeginLoc());
+ } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(&Decl)) {
+ if (const auto *T = F->getDescribedFunctionTemplate())
+ if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
+ Range.setBegin(T->getBeginLoc());
+ }
+
+ // Next, expand the end location past trailing comments to include a potential
+ // newline at the end of the decl's line.
+ Range.setEnd(
+ getEntityEndLoc(SM, Decl.getEndLoc(), getTerminators(Decl), LangOpts));
+ Range.setTokenRange(false);
+
+ // Expand to include preceeding associated comments. We ignore any comments
+ // that are not preceeding the decl, since we've already skipped trailing
+ // comments with getEntityEndLoc.
+ if (const RawComment *Comment =
+ Decl.getASTContext().getRawCommentForDeclNoCache(&Decl))
+ // Only include a preceding comment if:
+ // * it is *not* separate from the declaration (not including any newline
+ // that immediately follows the comment),
+ // * the decl *is* separate from any following entity (so, there are no
+ // other entities the comment could refer to), and
+ // * it is not a IfThisThenThat lint check.
+ if (SM.isBeforeInTranslationUnit(Comment->getBeginLoc(),
+ Range.getBegin()) &&
+ !atOrBeforeSeparation(
+ SM, skipWhitespaceAndNewline(SM, Comment->getEndLoc(), LangOpts),
+ LangOpts) &&
+ atOrBeforeSeparation(SM, Range.getEnd(), LangOpts)) {
+ const StringRef CommentText = Comment->getRawText(SM);
+ if (!CommentText.contains("LINT.IfChange") &&
+ !CommentText.contains("LINT.ThenChange"))
+ Range.setBegin(Comment->getBeginLoc());
+ }
+ // Add leading attributes.
+ for (auto *Attr : Decl.attrs()) {
+ if (Attr->getLocation().isInvalid() ||
+ !SM.isBeforeInTranslationUnit(Attr->getLocation(), Range.getBegin()))
+ continue;
+ Range.setBegin(Attr->getLocation());
+
+ // Extend to the left '[[' or '__attribute((' if we saw the attribute,
+ // unless it is not a valid location.
+ bool Invalid;
+ StringRef Source =
+ SM.getBufferData(SM.getFileID(Range.getBegin()), &Invalid);
+ if (Invalid)
+ continue;
+ llvm::StringRef BeforeAttr =
+ Source.substr(0, SM.getFileOffset(Range.getBegin()));
+ llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim();
+
+ for (llvm::StringRef Prefix : {"[[", "__attribute__(("}) {
+ // Handle whitespace between attribute prefix and attribute value.
+ if (BeforeAttrStripped.endswith(Prefix)) {
+ // Move start to start position of prefix, which is
+ // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix)
+ // positions to the left.
+ Range.setBegin(Range.getBegin().getLocWithOffset(static_cast<int>(
+ -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size())));
+ break;
+ // If we didn't see '[[' or '__attribute' it's probably coming from a
+ // macro expansion which is already handled by makeFileCharRange(),
+ // below.
+ }
+ }
+ }
+
+ // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But,
+ // Range.getBegin() may be inside an expansion.
+ return Lexer::makeFileCharRange(Range, SM, LangOpts);
}
diff --git a/clang/lib/Tooling/Transformer/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp
index 8710e3cdf60f3..2670bf7adabf3 100644
--- a/clang/lib/Tooling/Transformer/Stencil.cpp
+++ b/clang/lib/Tooling/Transformer/Stencil.cpp
@@ -12,12 +12,14 @@
#include "clang/AST/Expr.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/Transformer/SourceCode.h"
#include "clang/Tooling/Transformer/SourceCodeBuilders.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
#include <atomic>
#include <memory>
#include <string>
@@ -26,7 +28,6 @@ using namespace clang;
using namespace transformer;
using ast_matchers::MatchFinder;
-using ast_type_traits::DynTypedNode;
using llvm::errc;
using llvm::Error;
using llvm::Expected;
@@ -81,14 +82,14 @@ struct SelectorData {
// A stencil operation to build a member access `e.m` or `e->m`, as appropriate.
struct AccessData {
AccessData(StringRef BaseId, Stencil Member)
- : BaseId(BaseId), Member(std::move(Member)) {}
+ : BaseId(std::string(BaseId)), Member(std::move(Member)) {}
std::string BaseId;
Stencil Member;
};
struct IfBoundData {
IfBoundData(StringRef Id, Stencil TrueStencil, Stencil FalseStencil)
- : Id(Id), TrueStencil(std::move(TrueStencil)),
+ : Id(std::string(Id)), TrueStencil(std::move(TrueStencil)),
FalseStencil(std::move(FalseStencil)) {}
std::string Id;
Stencil TrueStencil;
@@ -227,10 +228,37 @@ Error evalData(const UnaryOperationData &Data,
Error evalData(const SelectorData &Data, const MatchFinder::MatchResult &Match,
std::string *Result) {
- auto Range = Data.Selector(Match);
- if (!Range)
- return Range.takeError();
- *Result += tooling::getText(*Range, *Match.Context);
+ auto RawRange = Data.Selector(Match);
+ if (!RawRange)
+ return RawRange.takeError();
+ CharSourceRange Range = Lexer::makeFileCharRange(
+ *RawRange, *Match.SourceManager, Match.Context->getLangOpts());
+ if (Range.isInvalid()) {
+ // Validate the original range to attempt to get a meaningful error message.
+ // If it's valid, then something else is the cause and we just return the
+ // generic failure message.
+ if (auto Err = tooling::validateEditRange(*RawRange, *Match.SourceManager))
+ return handleErrors(std::move(Err), [](std::unique_ptr<StringError> E) {
+ assert(E->convertToErrorCode() ==
+ llvm::make_error_code(errc::invalid_argument) &&
+ "Validation errors must carry the invalid_argument code");
+ return llvm::createStringError(
+ errc::invalid_argument,
+ "selected range could not be resolved to a valid source range; " +
+ E->getMessage());
+ });
+ return llvm::createStringError(
+ errc::invalid_argument,
+ "selected range could not be resolved to a valid source range");
+ }
+ // Validate `Range`, because `makeFileCharRange` accepts some ranges that
+ // `validateEditRange` rejects.
+ if (auto Err = tooling::validateEditRange(Range, *Match.SourceManager))
+ return joinErrors(
+ llvm::createStringError(errc::invalid_argument,
+ "selected range is not valid for editing"),
+ std::move(Err));
+ *Result += tooling::getText(Range, *Match.Context);
return Error::success();
}
@@ -294,47 +322,41 @@ public:
};
} // namespace
-Stencil transformer::detail::makeStencil(StringRef Text) { return text(Text); }
-
-Stencil transformer::detail::makeStencil(RangeSelector Selector) {
- return selection(std::move(Selector));
+Stencil transformer::detail::makeStencil(StringRef Text) {
+ return std::make_shared<StencilImpl<RawTextData>>(std::string(Text));
}
-Stencil transformer::text(StringRef Text) {
- return std::make_shared<StencilImpl<RawTextData>>(Text);
-}
-
-Stencil transformer::selection(RangeSelector Selector) {
+Stencil transformer::detail::makeStencil(RangeSelector Selector) {
return std::make_shared<StencilImpl<SelectorData>>(std::move(Selector));
}
Stencil transformer::dPrint(StringRef Id) {
- return std::make_shared<StencilImpl<DebugPrintNodeData>>(Id);
+ return std::make_shared<StencilImpl<DebugPrintNodeData>>(std::string(Id));
}
Stencil transformer::expression(llvm::StringRef Id) {
return std::make_shared<StencilImpl<UnaryOperationData>>(
- UnaryNodeOperator::Parens, Id);
+ UnaryNodeOperator::Parens, std::string(Id));
}
Stencil transformer::deref(llvm::StringRef ExprId) {
return std::make_shared<StencilImpl<UnaryOperationData>>(
- UnaryNodeOperator::Deref, ExprId);
+ UnaryNodeOperator::Deref, std::string(ExprId));
}
Stencil transformer::maybeDeref(llvm::StringRef ExprId) {
return std::make_shared<StencilImpl<UnaryOperationData>>(
- UnaryNodeOperator::MaybeDeref, ExprId);
+ UnaryNodeOperator::MaybeDeref, std::string(ExprId));
}
Stencil transformer::addressOf(llvm::StringRef ExprId) {
return std::make_shared<StencilImpl<UnaryOperationData>>(
- UnaryNodeOperator::AddressOf, ExprId);
+ UnaryNodeOperator::AddressOf, std::string(ExprId));
}
Stencil transformer::maybeAddressOf(llvm::StringRef ExprId) {
return std::make_shared<StencilImpl<UnaryOperationData>>(
- UnaryNodeOperator::MaybeAddressOf, ExprId);
+ UnaryNodeOperator::MaybeAddressOf, std::string(ExprId));
}
Stencil transformer::access(StringRef BaseId, Stencil Member) {
diff --git a/clang/lib/Tooling/Transformer/Transformer.cpp b/clang/lib/Tooling/Transformer/Transformer.cpp
index 71f0646f4c0eb..e8fc00c4e953f 100644
--- a/clang/lib/Tooling/Transformer/Transformer.cpp
+++ b/clang/lib/Tooling/Transformer/Transformer.cpp
@@ -12,6 +12,7 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Tooling/Refactoring/AtomicChange.h"
#include "llvm/Support/Error.h"
+#include <map>
#include <utility>
#include <vector>
@@ -31,7 +32,7 @@ void Transformer::run(const MatchFinder::MatchResult &Result) {
transformer::RewriteRule::Case Case =
transformer::detail::findSelectedCase(Result, Rule);
- auto Transformations = transformer::detail::translateEdits(Result, Case.Edits);
+ auto Transformations = Case.Edits(Result);
if (!Transformations) {
Consumer(Transformations.takeError());
return;
@@ -45,28 +46,39 @@ void Transformer::run(const MatchFinder::MatchResult &Result) {
return;
}
- // Record the results in the AtomicChange, anchored at the location of the
- // first change.
- AtomicChange AC(*Result.SourceManager,
- (*Transformations)[0].Range.getBegin());
+ // Group the transformations, by file, into AtomicChanges, each anchored by
+ // the location of the first change in that file.
+ std::map<FileID, AtomicChange> ChangesByFileID;
for (const auto &T : *Transformations) {
+ auto ID = Result.SourceManager->getFileID(T.Range.getBegin());
+ auto Iter = ChangesByFileID
+ .emplace(ID, AtomicChange(*Result.SourceManager,
+ T.Range.getBegin(), T.Metadata))
+ .first;
+ auto &AC = Iter->second;
if (auto Err = AC.replace(*Result.SourceManager, T.Range, T.Replacement)) {
Consumer(std::move(Err));
return;
}
}
- for (const auto &I : Case.AddedIncludes) {
- auto &Header = I.first;
- switch (I.second) {
- case transformer::IncludeFormat::Quoted:
- AC.addHeader(Header);
- break;
- case transformer::IncludeFormat::Angled:
- AC.addHeader((llvm::Twine("<") + Header + ">").str());
- break;
+ for (auto &IDChangePair : ChangesByFileID) {
+ auto &AC = IDChangePair.second;
+ // FIXME: this will add includes to *all* changed files, which may not be
+ // the intent. We should upgrade the representation to allow associating
+ // headers with specific edits.
+ for (const auto &I : Case.AddedIncludes) {
+ auto &Header = I.first;
+ switch (I.second) {
+ case transformer::IncludeFormat::Quoted:
+ AC.addHeader(Header);
+ break;
+ case transformer::IncludeFormat::Angled:
+ AC.addHeader((llvm::Twine("<") + Header + ">").str());
+ break;
+ }
}
- }
- Consumer(std::move(AC));
+ Consumer(std::move(AC));
+ }
}