6 files changed, 754 insertions, 82 deletions
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp
new file mode 100644
index 0000000000000..1579115b93138
--- /dev/null
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -0,0 +1,279 @@
+//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Transformer/Parsing.h"
+#include "clang/AST/Expr.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/Transformer/RangeSelector.h"
+#include "clang/Tooling/Transformer/SourceCode.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace clang;
+using namespace transformer;
+
+// FIXME: This implementation is entirely separate from that of the AST
+// matchers. Given the similarity of the languages and uses of the two parsers,
+// the two should share a common parsing infrastructure, as should other
+// Transformer types. We intend to unify this implementation soon to share as
+// much as possible with the AST Matchers parsing.
+
+namespace {
+using llvm::Error;
+using llvm::Expected;
+
+template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
+
+struct ParseState {
+  // The remaining input to be processed.
+  StringRef Input;
+  // The original input. Not modified during parsing; only for reference in
+  // error reporting.
+  StringRef OriginalInput;
+};
+
+// Represents an intermediate result returned by a parsing function. Functions
+// that don't generate values should use `llvm::None`
+template <typename ResultType> struct ParseProgress {
+  ParseState State;
+  // Intermediate result generated by the Parser.
+  ResultType Value;
+};
+
+template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
+template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
+
+class ParseError : public llvm::ErrorInfo<ParseError> {
+public:
+  // Required field for all ErrorInfo derivatives.
+  static char ID;
+
+  ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
+      : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
+        Excerpt(std::move(InputExcerpt)) {}
+
+  void log(llvm::raw_ostream &OS) const override {
+    OS << "parse error at position (" << Pos << "): " << ErrorMsg
+       << ": " + Excerpt;
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+
+  // Position of the error in the input string.
+  size_t Pos;
+  std::string ErrorMsg;
+  // Excerpt of the input starting at the error position.
+  std::string Excerpt;
+};
+
+char ParseError::ID;
+} // namespace
+
+static const llvm::StringMap<RangeSelectorOp<std::string>> &
+getUnaryStringSelectors() {
+  static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
+      {"name", name},
+      {"node", node},
+      {"statement", statement},
+      {"statements", statements},
+      {"member", member},
+      {"callArgs", callArgs},
+      {"elseBranch", elseBranch},
+      {"initListElements", initListElements}};
+  return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
+getUnaryRangeSelectors() {
+  static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
+      {"before", before}, {"after", after}, {"expansion", expansion}};
+  return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
+getBinaryStringSelectors() {
+  static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
+      {"encloseNodes", range}};
+  return M;
+}
+
+static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
+getBinaryRangeSelectors() {
+  static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
+      M = {{"enclose", range}};
+  return M;
+}
+
+template <typename Element>
+llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
+                                     llvm::StringRef Key) {
+  auto it = Map.find(Key);
+  if (it == Map.end())
+    return llvm::None;
+  return it->second;
+}
+
+template <typename ResultType>
+ParseProgress<ResultType> makeParseProgress(ParseState State,
+                                            ResultType Result) {
+  return ParseProgress<ResultType>{State, std::move(Result)};
+}
+
+static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
+  size_t Pos = S.OriginalInput.size() - S.Input.size();
+  return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
+                                      S.OriginalInput.substr(Pos, 20).str());
+}
+
+// Returns a new ParseState that advances \c S by \c N characters.
+static ParseState advance(ParseState S, size_t N) {
+  S.Input = S.Input.drop_front(N);
+  return S;
+}
+
+static StringRef consumeWhitespace(StringRef S) {
+  return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); });
+}
+
+// Parses a single expected character \c c from \c State, skipping preceding
+// whitespace.  Error if the expected character isn't found.
+static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
+  State.Input = consumeWhitespace(State.Input);
+  if (State.Input.empty() || State.Input.front() != c)
+    return makeParseError(State,
+                          ("expected char not found: " + llvm::Twine(c)).str());
+  return makeParseProgress(advance(State, 1), llvm::None);
+}
+
+// Parses an identitifer "token" -- handles preceding whitespace.
+static ExpectedProgress<std::string> parseId(ParseState State) {
+  State.Input = consumeWhitespace(State.Input);
+  auto Id = State.Input.take_while(
+      [](char c) { return c >= 0 && isIdentifierBody(c); });
+  if (Id.empty())
+    return makeParseError(State, "failed to parse name");
+  return makeParseProgress(advance(State, Id.size()), Id.str());
+}
+
+// For consistency with the AST matcher parser and C++ code, node ids are
+// written as strings. However, we do not support escaping in the string.
+static ExpectedProgress<std::string> parseStringId(ParseState State) {
+  State.Input = consumeWhitespace(State.Input);
+  if (State.Input.empty())
+    return makeParseError(State, "unexpected end of input");
+  if (!State.Input.consume_front("\""))
+    return makeParseError(
+        State,
+        "expecting string, but encountered other character or end of input");
+
+  StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
+  if (State.Input.size() == Id.size())
+    return makeParseError(State, "unterminated string");
+  // Advance past the trailing quote as well.
+  return makeParseProgress(advance(State, Id.size() + 1), Id.str());
+}
+
+// Parses a single element surrounded by parens. `Op` is applied to the parsed
+// result to create the result of this function call.
+template <typename T>
+ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
+                                            RangeSelectorOp<T> Op,
+                                            ParseState State) {
+  auto P = parseChar('(', State);
+  if (!P)
+    return P.takeError();
+
+  auto E = ParseElement(P->State);
+  if (!E)
+    return E.takeError();
+
+  P = parseChar(')', E->State);
+  if (!P)
+    return P.takeError();
+
+  return makeParseProgress(P->State, Op(std::move(E->Value)));
+}
+
+// Parses a pair of elements surrounded by parens and separated by comma. `Op`
+// is applied to the parsed results to create the result of this function call.
+template <typename T>
+ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
+                                          RangeSelectorOp<T, T> Op,
+                                          ParseState State) {
+  auto P = parseChar('(', State);
+  if (!P)
+    return P.takeError();
+
+  auto Left = ParseElement(P->State);
+  if (!Left)
+    return Left.takeError();
+
+  P = parseChar(',', Left->State);
+  if (!P)
+    return P.takeError();
+
+  auto Right = ParseElement(P->State);
+  if (!Right)
+    return Right.takeError();
+
+  P = parseChar(')', Right->State);
+  if (!P)
+    return P.takeError();
+
+  return makeParseProgress(P->State,
+                           Op(std::move(Left->Value), std::move(Right->Value)));
+}
+
+// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
+// Id operator). Returns StencilType representing the operator on success and
+// error if it fails to parse input for an operator.
+static ExpectedProgress<RangeSelector>
+parseRangeSelectorImpl(ParseState State) {
+  auto Id = parseId(State);
+  if (!Id)
+    return Id.takeError();
+
+  std::string OpName = std::move(Id->Value);
+  if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
+    return parseSingle(parseStringId, *Op, Id->State);
+
+  if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
+    return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
+
+  if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
+    return parsePair(parseStringId, *Op, Id->State);
+
+  if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
+    return parsePair(parseRangeSelectorImpl, *Op, Id->State);
+
+  return makeParseError(State, "unknown selector name: " + OpName);
+}
+
+Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
+  ParseState State = {Input, Input};
+  ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
+  if (!Result)
+    return Result.takeError();
+  State = Result->State;
+  // Discard any potentially trailing whitespace.
+  State.Input = consumeWhitespace(State.Input);
+  if (State.Input.empty())
+    return Result->Value;
+  return makeParseError(State, "unexpected input after selector");
+}
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 9f81423c9022b..29b1a5b0372ea 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -23,8 +23,6 @@ using namespace clang;
 using namespace transformer;
 
 using ast_matchers::MatchFinder;
-using ast_type_traits::ASTNodeKind;
-using ast_type_traits::DynTypedNode;
 using llvm::Error;
 using llvm::StringError;
 
@@ -148,7 +146,7 @@ RangeSelector transformer::statement(std::string ID) {
   };
 }
 
-RangeSelector transformer::range(RangeSelector Begin, RangeSelector End) {
+RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
   return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
     Expected<CharSourceRange> BeginRange = Begin(Result);
     if (!BeginRange)
@@ -167,8 +165,9 @@ RangeSelector transformer::range(RangeSelector Begin, RangeSelector End) {
   };
 }
 
-RangeSelector transformer::range(std::string BeginID, std::string EndID) {
-  return transformer::range(node(std::move(BeginID)), node(std::move(EndID)));
+RangeSelector transformer::encloseNodes(std::string BeginID,
+                                        std::string EndID) {
+  return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
 }
 
 RangeSelector transformer::member(std::string ID) {
diff --git a/clang/lib/Tooling/Transformer/RewriteRule.cpp b/clang/lib/Tooling/Transformer/RewriteRule.cpp
index 20d3a371950af..995bec03cd669 100644
--- a/clang/lib/Tooling/Transformer/RewriteRule.cpp
+++ b/clang/lib/Tooling/Transformer/RewriteRule.cpp
@@ -25,16 +25,14 @@ using namespace transformer;
 
 using ast_matchers::MatchFinder;
 using ast_matchers::internal::DynTypedMatcher;
-using ast_type_traits::ASTNodeKind;
 
 using MatchResult = MatchFinder::MatchResult;
 
-Expected<SmallVector<transformer::detail::Transformation, 1>>
-transformer::detail::translateEdits(const MatchResult &Result,
-                                llvm::ArrayRef<ASTEdit> Edits) {
-  SmallVector<transformer::detail::Transformation, 1> Transformations;
-  for (const auto &Edit : Edits) {
-    Expected<CharSourceRange> Range = Edit.TargetRange(Result);
+static Expected<SmallVector<transformer::Edit, 1>>
+translateEdits(const MatchResult &Result, ArrayRef<ASTEdit> ASTEdits) {
+  SmallVector<transformer::Edit, 1> Edits;
+  for (const auto &E : ASTEdits) {
+    Expected<CharSourceRange> Range = E.TargetRange(Result);
     if (!Range)
       return Range.takeError();
     llvm::Optional<CharSourceRange> EditRange =
@@ -42,21 +40,34 @@ transformer::detail::translateEdits(const MatchResult &Result,
     // FIXME: let user specify whether to treat this case as an error or ignore
     // it as is currently done.
     if (!EditRange)
-      return SmallVector<Transformation, 0>();
-    auto Replacement = Edit.Replacement->eval(Result);
+      return SmallVector<Edit, 0>();
+    auto Replacement = E.Replacement->eval(Result);
     if (!Replacement)
       return Replacement.takeError();
-    transformer::detail::Transformation T;
+    transformer::Edit T;
     T.Range = *EditRange;
     T.Replacement = std::move(*Replacement);
-    Transformations.push_back(std::move(T));
+    T.Metadata = E.Metadata;
+    Edits.push_back(std::move(T));
   }
-  return Transformations;
+  return Edits;
 }
 
-ASTEdit transformer::changeTo(RangeSelector S, TextGenerator Replacement) {
+EditGenerator transformer::editList(SmallVector<ASTEdit, 1> Edits) {
+  return [Edits = std::move(Edits)](const MatchResult &Result) {
+    return translateEdits(Result, Edits);
+  };
+}
+
+EditGenerator transformer::edit(ASTEdit Edit) {
+  return [Edit = std::move(Edit)](const MatchResult &Result) {
+    return translateEdits(Result, {Edit});
+  };
+}
+
+ASTEdit transformer::changeTo(RangeSelector Target, TextGenerator Replacement) {
   ASTEdit E;
-  E.TargetRange = std::move(S);
+  E.TargetRange = std::move(Target);
   E.Replacement = std::move(Replacement);
   return E;
 }
@@ -83,8 +94,9 @@ ASTEdit transformer::remove(RangeSelector S) {
   return change(std::move(S), std::make_shared<SimpleTextGenerator>(""));
 }
 
-RewriteRule transformer::makeRule(DynTypedMatcher M, SmallVector<ASTEdit, 1> Edits,
-                              TextGenerator Explanation) {
+RewriteRule transformer::makeRule(ast_matchers::internal::DynTypedMatcher M,
+                                  EditGenerator Edits,
+                                  TextGenerator Explanation) {
   return RewriteRule{{RewriteRule::Case{
       std::move(M), std::move(Edits), std::move(Explanation), {}}}};
 }
@@ -105,10 +117,13 @@ static bool hasValidKind(const DynTypedMatcher &M) {
 #endif
 
 // Binds each rule's matcher to a unique (and deterministic) tag based on
-// `TagBase` and the id paired with the case.
+// `TagBase` and the id paired with the case. All of the returned matchers have
+// their traversal kind explicitly set, either based on a pre-set kind or to the
+// provided `DefaultTraversalKind`.
 static std::vector<DynTypedMatcher> taggedMatchers(
     StringRef TagBase,
-    const SmallVectorImpl<std::pair<size_t, RewriteRule::Case>> &Cases) {
+    const SmallVectorImpl<std::pair<size_t, RewriteRule::Case>> &Cases,
+    ast_type_traits::TraversalKind DefaultTraversalKind) {
   std::vector<DynTypedMatcher> Matchers;
   Matchers.reserve(Cases.size());
   for (const auto &Case : Cases) {
@@ -116,8 +131,10 @@ static std::vector<DynTypedMatcher> taggedMatchers(
     // HACK: Many matchers are not bindable, so ensure that tryBind will work.
     DynTypedMatcher BoundMatcher(Case.second.Matcher);
     BoundMatcher.setAllowBind(true);
-    auto M = BoundMatcher.tryBind(Tag);
-    Matchers.push_back(*std::move(M));
+    auto M = *BoundMatcher.tryBind(Tag);
+    Matchers.push_back(!M.getTraversalKind()
+                           ? M.withTraversalKind(DefaultTraversalKind)
+                           : std::move(M));
   }
   return Matchers;
 }
@@ -147,14 +164,21 @@ transformer::detail::buildMatchers(const RewriteRule &Rule) {
     Buckets[Cases[I].Matcher.getSupportedKind()].emplace_back(I, Cases[I]);
   }
 
+  // Each anyOf explicitly controls the traversal kind. The anyOf itself is set
+  // to `TK_AsIs` to ensure no nodes are skipped, thereby deferring to the kind
+  // of the branches. Then, each branch is either left as is, if the kind is
+  // already set, or explicitly set to `TK_IgnoreUnlessSpelledInSource`. We
+  // choose this setting, because we think it is the one most friendly to
+  // beginners, who are (largely) the target audience of Transformer.
   std::vector<DynTypedMatcher> Matchers;
   for (const auto &Bucket : Buckets) {
     DynTypedMatcher M = DynTypedMatcher::constructVariadic(
         DynTypedMatcher::VO_AnyOf, Bucket.first,
-        taggedMatchers("Tag", Bucket.second));
+        taggedMatchers("Tag", Bucket.second, TK_IgnoreUnlessSpelledInSource));
     M.setAllowBind(true);
     // `tryBind` is guaranteed to succeed, because `AllowBind` was set to true.
-    Matchers.push_back(*M.tryBind(RewriteRule::RootID));
+    Matchers.push_back(
+        M.tryBind(RewriteRule::RootID)->withTraversalKind(TK_AsIs));
   }
   return Matchers;
 }
diff --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp
index 836401d1e605c..26b204851f058 100644
--- a/clang/lib/Tooling/Transformer/SourceCode.cpp
+++ b/clang/lib/Tooling/Transformer/SourceCode.cpp
@@ -10,10 +10,24 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Tooling/Transformer/SourceCode.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/Comment.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include <set>
 
 using namespace clang;
 
+using llvm::errc;
+using llvm::StringError;
+
 StringRef clang::tooling::getText(CharSourceRange Range,
                                   const ASTContext &Context) {
   return Lexer::getSourceText(Range, Context.getSourceManager(),
@@ -23,11 +37,45 @@ StringRef clang::tooling::getText(CharSourceRange Range,
 CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range,
                                                  tok::TokenKind Next,
                                                  ASTContext &Context) {
-  Optional<Token> Tok = Lexer::findNextToken(
-      Range.getEnd(), Context.getSourceManager(), Context.getLangOpts());
-  if (!Tok || !Tok->is(Next))
+  CharSourceRange R = Lexer::getAsCharRange(Range, Context.getSourceManager(),
+                                            Context.getLangOpts());
+  if (R.isInvalid())
+    return Range;
+  Token Tok;
+  bool Err =
+      Lexer::getRawToken(R.getEnd(), Tok, Context.getSourceManager(),
+                         Context.getLangOpts(), /*IgnoreWhiteSpace=*/true);
+  if (Err || !Tok.is(Next))
     return Range;
-  return CharSourceRange::getTokenRange(Range.getBegin(), Tok->getLocation());
+  return CharSourceRange::getTokenRange(Range.getBegin(), Tok.getLocation());
+}
+
+llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range,
+                                              const SourceManager &SM) {
+  if (Range.isInvalid())
+    return llvm::make_error<StringError>(errc::invalid_argument,
+                                         "Invalid range");
+
+  if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
+    return llvm::make_error<StringError>(
+        errc::invalid_argument, "Range starts or ends in a macro expansion");
+
+  if (SM.isInSystemHeader(Range.getBegin()) ||
+      SM.isInSystemHeader(Range.getEnd()))
+    return llvm::make_error<StringError>(errc::invalid_argument,
+                                         "Range is in system header");
+
+  std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin());
+  std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd());
+  if (BeginInfo.first != EndInfo.first)
+    return llvm::make_error<StringError>(
+        errc::invalid_argument, "Range begins and ends in different files");
+
+  if (BeginInfo.second > EndInfo.second)
+    return llvm::make_error<StringError>(
+        errc::invalid_argument, "Range's begin is past its end");
+
+  return llvm::Error::success();
 }
 
 llvm::Optional<CharSourceRange>
@@ -46,20 +94,308 @@ clang::tooling::getRangeForEdit(const CharSourceRange &EditRange,
   //    foo(DO_NOTHING(6))
   // Decide whether the current behavior is desirable and modify if not.
   CharSourceRange Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts);
-  if (Range.isInvalid())
-    return None;
+  bool IsInvalid = llvm::errorToBool(validateEditRange(Range, SM));
+  if (IsInvalid)
+    return llvm::None;
+  return Range;
 
-  if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
-    return None;
-  if (SM.isInSystemHeader(Range.getBegin()) ||
-      SM.isInSystemHeader(Range.getEnd()))
-    return None;
+}
 
-  std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin());
-  std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd());
-  if (BeginInfo.first != EndInfo.first ||
-      BeginInfo.second > EndInfo.second)
-    return None;
+static bool startsWithNewline(const SourceManager &SM, const Token &Tok) {
+  return isVerticalWhitespace(SM.getCharacterData(Tok.getLocation())[0]);
+}
 
-  return Range;
+static bool contains(const std::set<tok::TokenKind> &Terminators,
+                     const Token &Tok) {
+  return Terminators.count(Tok.getKind()) > 0;
+}
+
+// Returns the exclusive, *file* end location of the entity whose last token is
+// at location 'EntityLast'. That is, it returns the location one past the last
+// relevant character.
+//
+// Associated tokens include comments, horizontal whitespace and 'Terminators'
+// -- optional tokens, which, if any are found, will be included; if
+// 'Terminators' is empty, we will not include any extra tokens beyond comments
+// and horizontal whitespace.
+static SourceLocation
+getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast,
+                const std::set<tok::TokenKind> &Terminators,
+                const LangOptions &LangOpts) {
+  assert(EntityLast.isValid() && "Invalid end location found.");
+
+  // We remember the last location of a non-horizontal-whitespace token we have
+  // lexed; this is the location up to which we will want to delete.
+  // FIXME: Support using the spelling loc here for cases where we want to
+  // analyze the macro text.
+
+  CharSourceRange ExpansionRange = SM.getExpansionRange(EntityLast);
+  // FIXME: Should check isTokenRange(), for the (rare) case that
+  // `ExpansionRange` is a character range.
+  std::unique_ptr<Lexer> Lexer = [&]() {
+    bool Invalid = false;
+    auto FileOffset = SM.getDecomposedLoc(ExpansionRange.getEnd());
+    llvm::StringRef File = SM.getBufferData(FileOffset.first, &Invalid);
+    assert(!Invalid && "Cannot get file/offset");
+    return std::make_unique<clang::Lexer>(
+        SM.getLocForStartOfFile(FileOffset.first), LangOpts, File.begin(),
+        File.data() + FileOffset.second, File.end());
+  }();
+
+  // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown).
+  Lexer->SetKeepWhitespaceMode(true);
+
+  // Generally, the code we want to include looks like this ([] are optional),
+  // If Terminators is empty:
+  //   [ <comment> ] [ <newline> ]
+  // Otherwise:
+  //   ... <terminator> [ <comment> ] [ <newline> ]
+
+  Token Tok;
+  bool Terminated = false;
+
+  // First, lex to the current token (which is the last token of the range that
+  // is definitely associated with the decl). Then, we process the first token
+  // separately from the rest based on conditions that hold specifically for
+  // that first token.
+  //
+  // We do not search for a terminator if none is required or we've already
+  // encountered it. Otherwise, if the original `EntityLast` location was in a
+  // macro expansion, we don't have visibility into the text, so we assume we've
+  // already terminated. However, we note this assumption with
+  // `TerminatedByMacro`, because we'll want to handle it somewhat differently
+  // for the terminators semicolon and comma. These terminators can be safely
+  // associated with the entity when they appear after the macro -- extra
+  // semicolons have no effect on the program and a well-formed program won't
+  // have multiple commas in a row, so we're guaranteed that there is only one.
+  //
+  // FIXME: This handling of macros is more conservative than necessary. When
+  // the end of the expansion coincides with the end of the node, we can still
+  // safely analyze the code. But, it is more complicated, because we need to
+  // start by lexing the spelling loc for the first token and then switch to the
+  // expansion loc.
+  bool TerminatedByMacro = false;
+  Lexer->LexFromRawLexer(Tok);
+  if (Terminators.empty() || contains(Terminators, Tok))
+    Terminated = true;
+  else if (EntityLast.isMacroID()) {
+    Terminated = true;
+    TerminatedByMacro = true;
+  }
+
+  // We save the most recent candidate for the exclusive end location.
+  SourceLocation End = Tok.getEndLoc();
+
+  while (!Terminated) {
+    // Lex the next token we want to possibly expand the range with.
+    Lexer->LexFromRawLexer(Tok);
+
+    switch (Tok.getKind()) {
+    case tok::eof:
+    // Unexpected separators.
+    case tok::l_brace:
+    case tok::r_brace:
+    case tok::comma:
+      return End;
+    // Whitespace pseudo-tokens.
+    case tok::unknown:
+      if (startsWithNewline(SM, Tok))
+        // Include at least until the end of the line.
+        End = Tok.getEndLoc();
+      break;
+    default:
+      if (contains(Terminators, Tok))
+        Terminated = true;
+      End = Tok.getEndLoc();
+      break;
+    }
+  }
+
+  do {
+    // Lex the next token we want to possibly expand the range with.
+    Lexer->LexFromRawLexer(Tok);
+
+    switch (Tok.getKind()) {
+    case tok::unknown:
+      if (startsWithNewline(SM, Tok))
+        // We're done, but include this newline.
+        return Tok.getEndLoc();
+      break;
+    case tok::comment:
+      // Include any comments we find on the way.
+      End = Tok.getEndLoc();
+      break;
+    case tok::semi:
+    case tok::comma:
+      if (TerminatedByMacro && contains(Terminators, Tok)) {
+        End = Tok.getEndLoc();
+        // We've found a real terminator.
+        TerminatedByMacro = false;
+        break;
+      }
+      // Found an unrelated token; stop and don't include it.
+      return End;
+    default:
+      // Found an unrelated token; stop and don't include it.
+      return End;
+    }
+  } while (true);
+}
+
+// Returns the expected terminator tokens for the given declaration.
+//
+// If we do not know the correct terminator token, returns an empty set.
+//
+// There are cases where we have more than one possible terminator (for example,
+// we find either a comma or a semicolon after a VarDecl).
+static std::set<tok::TokenKind> getTerminators(const Decl &D) {
+  if (llvm::isa<RecordDecl>(D) || llvm::isa<UsingDecl>(D))
+    return {tok::semi};
+
+  if (llvm::isa<FunctionDecl>(D) || llvm::isa<LinkageSpecDecl>(D))
+    return {tok::r_brace, tok::semi};
+
+  if (llvm::isa<VarDecl>(D) || llvm::isa<FieldDecl>(D))
+    return {tok::comma, tok::semi};
+
+  return {};
+}
+
+// Starting from `Loc`, skips whitespace up to, and including, a single
+// newline. Returns the (exclusive) end of any skipped whitespace (that is, the
+// location immediately after the whitespace).
+static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM,
+                                               SourceLocation Loc,
+                                               const LangOptions &LangOpts) {
+  const char *LocChars = SM.getCharacterData(Loc);
+  int i = 0;
+  while (isHorizontalWhitespace(LocChars[i]))
+    ++i;
+  if (isVerticalWhitespace(LocChars[i]))
+    ++i;
+  return Loc.getLocWithOffset(i);
+}
+
+// Is `Loc` separated from any following decl by something meaningful (e.g. an
+// empty line, a comment), ignoring horizontal whitespace?  Since this is a
+// heuristic, we return false when in doubt.  `Loc` cannot be the first location
+// in the file.
+static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc,
+                                 const LangOptions &LangOpts) {
+  // If the preceding character is a newline, we'll check for an empty line as a
+  // separator. However, we can't identify an empty line using tokens, so we
+  // analyse the characters. If we try to use tokens, we'll just end up with a
+  // whitespace token, whose characters we'd have to analyse anyhow.
+  bool Invalid = false;
+  const char *LocChars =
+      SM.getCharacterData(Loc.getLocWithOffset(-1), &Invalid);
+  assert(!Invalid &&
+         "Loc must be a valid character and not the first of the source file.");
+  if (isVerticalWhitespace(LocChars[0])) {
+    for (int i = 1; isWhitespace(LocChars[i]); ++i)
+      if (isVerticalWhitespace(LocChars[i]))
+        return true;
+  }
+  // We didn't find an empty line, so lex the next token, skipping past any
+  // whitespace we just scanned.
+  Token Tok;
+  bool Failed = Lexer::getRawToken(Loc, Tok, SM, LangOpts,
+                                   /*IgnoreWhiteSpace=*/true);
+  if (Failed)
+    // Any text that confuses the lexer seems fair to consider a separation.
+    return true;
+
+  switch (Tok.getKind()) {
+  case tok::comment:
+  case tok::l_brace:
+  case tok::r_brace:
+  case tok::eof:
+    return true;
+  default:
+    return false;
+  }
+}
+
+CharSourceRange tooling::getAssociatedRange(const Decl &Decl,
+                                            ASTContext &Context) {
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+  CharSourceRange Range = CharSourceRange::getTokenRange(Decl.getSourceRange());
+
+  // First, expand to the start of the template<> declaration if necessary.
+  if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(&Decl)) {
+    if (const auto *T = Record->getDescribedClassTemplate())
+      if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
+        Range.setBegin(T->getBeginLoc());
+  } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(&Decl)) {
+    if (const auto *T = F->getDescribedFunctionTemplate())
+      if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
+        Range.setBegin(T->getBeginLoc());
+  }
+
+  // Next, expand the end location past trailing comments to include a potential
+  // newline at the end of the decl's line.
+  Range.setEnd(
+      getEntityEndLoc(SM, Decl.getEndLoc(), getTerminators(Decl), LangOpts));
+  Range.setTokenRange(false);
+
+  // Expand to include preceeding associated comments. We ignore any comments
+  // that are not preceeding the decl, since we've already skipped trailing
+  // comments with getEntityEndLoc.
+  if (const RawComment *Comment =
+          Decl.getASTContext().getRawCommentForDeclNoCache(&Decl))
+    // Only include a preceding comment if:
+    // * it is *not* separate from the declaration (not including any newline
+    //   that immediately follows the comment),
+    // * the decl *is* separate from any following entity (so, there are no
+    //   other entities the comment could refer to), and
+    // * it is not a IfThisThenThat lint check.
+    if (SM.isBeforeInTranslationUnit(Comment->getBeginLoc(),
+                                     Range.getBegin()) &&
+        !atOrBeforeSeparation(
+            SM, skipWhitespaceAndNewline(SM, Comment->getEndLoc(), LangOpts),
+            LangOpts) &&
+        atOrBeforeSeparation(SM, Range.getEnd(), LangOpts)) {
+      const StringRef CommentText = Comment->getRawText(SM);
+      if (!CommentText.contains("LINT.IfChange") &&
+          !CommentText.contains("LINT.ThenChange"))
+        Range.setBegin(Comment->getBeginLoc());
+    }
+  // Add leading attributes.
+  for (auto *Attr : Decl.attrs()) {
+    if (Attr->getLocation().isInvalid() ||
+        !SM.isBeforeInTranslationUnit(Attr->getLocation(), Range.getBegin()))
+      continue;
+    Range.setBegin(Attr->getLocation());
+
+    // Extend to the left '[[' or '__attribute((' if we saw the attribute,
+    // unless it is not a valid location.
+    bool Invalid;
+    StringRef Source =
+        SM.getBufferData(SM.getFileID(Range.getBegin()), &Invalid);
+    if (Invalid)
+      continue;
+    llvm::StringRef BeforeAttr =
+        Source.substr(0, SM.getFileOffset(Range.getBegin()));
+    llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim();
+
+    for (llvm::StringRef Prefix : {"[[", "__attribute__(("}) {
+      // Handle whitespace between attribute prefix and attribute value.
+      if (BeforeAttrStripped.endswith(Prefix)) {
+        // Move start to start position of prefix, which is
+        // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix)
+        // positions to the left.
+        Range.setBegin(Range.getBegin().getLocWithOffset(static_cast<int>(
+            -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size())));
+        break;
+        // If we didn't see '[[' or '__attribute' it's probably coming from a
+        // macro expansion which is already handled by makeFileCharRange(),
+        // below.
+      }
+    }
+  }
+
+  // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But,
+  // Range.getBegin() may be inside an expansion.
+  return Lexer::makeFileCharRange(Range, SM, LangOpts);
 }
diff --git a/clang/lib/Tooling/Transformer/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp
index 8710e3cdf60f3..2670bf7adabf3 100644
--- a/clang/lib/Tooling/Transformer/Stencil.cpp
+++ b/clang/lib/Tooling/Transformer/Stencil.cpp
@@ -12,12 +12,14 @@
 #include "clang/AST/Expr.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Tooling/Transformer/SourceCode.h"
 #include "clang/Tooling/Transformer/SourceCodeBuilders.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
 #include <atomic>
 #include <memory>
 #include <string>
@@ -26,7 +28,6 @@ using namespace clang;
 using namespace transformer;
 
 using ast_matchers::MatchFinder;
-using ast_type_traits::DynTypedNode;
 using llvm::errc;
 using llvm::Error;
 using llvm::Expected;
@@ -81,14 +82,14 @@ struct SelectorData {
 // A stencil operation to build a member access `e.m` or `e->m`, as appropriate.
 struct AccessData {
   AccessData(StringRef BaseId, Stencil Member)
-      : BaseId(BaseId), Member(std::move(Member)) {}
+      : BaseId(std::string(BaseId)), Member(std::move(Member)) {}
   std::string BaseId;
   Stencil Member;
 };
 
 struct IfBoundData {
   IfBoundData(StringRef Id, Stencil TrueStencil, Stencil FalseStencil)
-      : Id(Id), TrueStencil(std::move(TrueStencil)),
+      : Id(std::string(Id)), TrueStencil(std::move(TrueStencil)),
         FalseStencil(std::move(FalseStencil)) {}
   std::string Id;
   Stencil TrueStencil;
@@ -227,10 +228,37 @@ Error evalData(const UnaryOperationData &Data,
 
 Error evalData(const SelectorData &Data, const MatchFinder::MatchResult &Match,
                std::string *Result) {
-  auto Range = Data.Selector(Match);
-  if (!Range)
-    return Range.takeError();
-  *Result += tooling::getText(*Range, *Match.Context);
+  auto RawRange = Data.Selector(Match);
+  if (!RawRange)
+    return RawRange.takeError();
+  CharSourceRange Range = Lexer::makeFileCharRange(
+      *RawRange, *Match.SourceManager, Match.Context->getLangOpts());
+  if (Range.isInvalid()) {
+    // Validate the original range to attempt to get a meaningful error message.
+    // If it's valid, then something else is the cause and we just return the
+    // generic failure message.
+    if (auto Err = tooling::validateEditRange(*RawRange, *Match.SourceManager))
+      return handleErrors(std::move(Err), [](std::unique_ptr<StringError> E) {
+        assert(E->convertToErrorCode() ==
+                   llvm::make_error_code(errc::invalid_argument) &&
+               "Validation errors must carry the invalid_argument code");
+        return llvm::createStringError(
+            errc::invalid_argument,
+            "selected range could not be resolved to a valid source range; " +
+                E->getMessage());
+      });
+    return llvm::createStringError(
+        errc::invalid_argument,
+        "selected range could not be resolved to a valid source range");
+  }
+  // Validate `Range`, because `makeFileCharRange` accepts some ranges that
+  // `validateEditRange` rejects.
+  if (auto Err = tooling::validateEditRange(Range, *Match.SourceManager))
+    return joinErrors(
+        llvm::createStringError(errc::invalid_argument,
+                                "selected range is not valid for editing"),
+        std::move(Err));
+  *Result += tooling::getText(Range, *Match.Context);
   return Error::success();
 }
 
@@ -294,47 +322,41 @@ public:
 };
 } // namespace
 
-Stencil transformer::detail::makeStencil(StringRef Text) { return text(Text); }
-
-Stencil transformer::detail::makeStencil(RangeSelector Selector) {
-  return selection(std::move(Selector));
+Stencil transformer::detail::makeStencil(StringRef Text) {
+  return std::make_shared<StencilImpl<RawTextData>>(std::string(Text));
 }
 
-Stencil transformer::text(StringRef Text) {
-  return std::make_shared<StencilImpl<RawTextData>>(Text);
-}
-
-Stencil transformer::selection(RangeSelector Selector) {
+Stencil transformer::detail::makeStencil(RangeSelector Selector) {
   return std::make_shared<StencilImpl<SelectorData>>(std::move(Selector));
 }
 
 Stencil transformer::dPrint(StringRef Id) {
-  return std::make_shared<StencilImpl<DebugPrintNodeData>>(Id);
+  return std::make_shared<StencilImpl<DebugPrintNodeData>>(std::string(Id));
 }
 
 Stencil transformer::expression(llvm::StringRef Id) {
   return std::make_shared<StencilImpl<UnaryOperationData>>(
-      UnaryNodeOperator::Parens, Id);
+      UnaryNodeOperator::Parens, std::string(Id));
 }
 
 Stencil transformer::deref(llvm::StringRef ExprId) {
   return std::make_shared<StencilImpl<UnaryOperationData>>(
-      UnaryNodeOperator::Deref, ExprId);
+      UnaryNodeOperator::Deref, std::string(ExprId));
 }
 
 Stencil transformer::maybeDeref(llvm::StringRef ExprId) {
   return std::make_shared<StencilImpl<UnaryOperationData>>(
-      UnaryNodeOperator::MaybeDeref, ExprId);
+      UnaryNodeOperator::MaybeDeref, std::string(ExprId));
 }
 
 Stencil transformer::addressOf(llvm::StringRef ExprId) {
   return std::make_shared<StencilImpl<UnaryOperationData>>(
-      UnaryNodeOperator::AddressOf, ExprId);
+      UnaryNodeOperator::AddressOf, std::string(ExprId));
 }
 
 Stencil transformer::maybeAddressOf(llvm::StringRef ExprId) {
   return std::make_shared<StencilImpl<UnaryOperationData>>(
-      UnaryNodeOperator::MaybeAddressOf, ExprId);
+      UnaryNodeOperator::MaybeAddressOf, std::string(ExprId));
 }
 
 Stencil transformer::access(StringRef BaseId, Stencil Member) {
diff --git a/clang/lib/Tooling/Transformer/Transformer.cpp b/clang/lib/Tooling/Transformer/Transformer.cpp
index 71f0646f4c0eb..e8fc00c4e953f 100644
--- a/clang/lib/Tooling/Transformer/Transformer.cpp
+++ b/clang/lib/Tooling/Transformer/Transformer.cpp
@@ -12,6 +12,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Tooling/Refactoring/AtomicChange.h"
 #include "llvm/Support/Error.h"
+#include <map>
 #include <utility>
 #include <vector>
 
@@ -31,7 +32,7 @@ void Transformer::run(const MatchFinder::MatchResult &Result) {
 
   transformer::RewriteRule::Case Case =
       transformer::detail::findSelectedCase(Result, Rule);
-  auto Transformations = transformer::detail::translateEdits(Result, Case.Edits);
+  auto Transformations = Case.Edits(Result);
   if (!Transformations) {
     Consumer(Transformations.takeError());
     return;
@@ -45,28 +46,39 @@ void Transformer::run(const MatchFinder::MatchResult &Result) {
     return;
   }
 
-  // Record the results in the AtomicChange, anchored at the location of the
-  // first change.
-  AtomicChange AC(*Result.SourceManager,
-                  (*Transformations)[0].Range.getBegin());
+  // Group the transformations, by file, into AtomicChanges, each anchored by
+  // the location of the first change in that file.
+  std::map<FileID, AtomicChange> ChangesByFileID;
   for (const auto &T : *Transformations) {
+    auto ID = Result.SourceManager->getFileID(T.Range.getBegin());
+    auto Iter = ChangesByFileID
+                    .emplace(ID, AtomicChange(*Result.SourceManager,
+                                              T.Range.getBegin(), T.Metadata))
+                    .first;
+    auto &AC = Iter->second;
     if (auto Err = AC.replace(*Result.SourceManager, T.Range, T.Replacement)) {
       Consumer(std::move(Err));
       return;
     }
   }
 
-  for (const auto &I : Case.AddedIncludes) {
-    auto &Header = I.first;
-    switch (I.second) {
-    case transformer::IncludeFormat::Quoted:
-      AC.addHeader(Header);
-      break;
-    case transformer::IncludeFormat::Angled:
-      AC.addHeader((llvm::Twine("<") + Header + ">").str());
-      break;
+  for (auto &IDChangePair : ChangesByFileID) {
+    auto &AC = IDChangePair.second;
+    // FIXME: this will add includes to *all* changed files, which may not be
+    // the intent. We should upgrade the representation to allow associating
+    // headers with specific edits.
+    for (const auto &I : Case.AddedIncludes) {
+      auto &Header = I.first;
+      switch (I.second) {
+      case transformer::IncludeFormat::Quoted:
+        AC.addHeader(Header);
+        break;
+      case transformer::IncludeFormat::Angled:
+        AC.addHeader((llvm::Twine("<") + Header + ">").str());
+        break;
+      }
     }
-  }
 
-  Consumer(std::move(AC));
+    Consumer(std::move(AC));
+  }
 }