5 files changed, 1598 insertions, 425 deletions
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index aa8844771d373..1f192180ec451 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -6,20 +6,32 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Tooling/Syntax/BuildTree.h"
+#include "clang/AST/ASTFwd.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
+#include "clang/AST/TypeLoc.h"
+#include "clang/AST/TypeLocVisitor.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Lexer.h"
+#include "clang/Lex/LiteralSupport.h"
 #include "clang/Tooling/Syntax/Nodes.h"
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
@@ -27,6 +39,7 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cstddef>
 #include <map>
 
 using namespace clang;
@@ -34,6 +47,207 @@ using namespace clang;
 LLVM_ATTRIBUTE_UNUSED
 static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
 
+namespace {
+/// Get start location of the Declarator from the TypeLoc.
+/// E.g.:
+///   loc of `(` in `int (a)`
+///   loc of `*` in `int *(a)`
+///   loc of the first `(` in `int (*a)(int)`
+///   loc of the `*` in `int *(a)(int)`
+///   loc of the first `*` in `const int *const *volatile a;`
+///
+/// It is non-trivial to get the start location because TypeLocs are stored
+/// inside out. In the example above `*volatile` is the TypeLoc returned
+/// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()`
+/// returns.
+struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> {
+  SourceLocation VisitParenTypeLoc(ParenTypeLoc T) {
+    auto L = Visit(T.getInnerLoc());
+    if (L.isValid())
+      return L;
+    return T.getLParenLoc();
+  }
+
+  // Types spelled in the prefix part of the declarator.
+  SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) {
+    return HandlePointer(T);
+  }
+
+  SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) {
+    return HandlePointer(T);
+  }
+
+  SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) {
+    return HandlePointer(T);
+  }
+
+  SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) {
+    return HandlePointer(T);
+  }
+
+  SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) {
+    return HandlePointer(T);
+  }
+
+  // All other cases are not important, as they are either part of declaration
+  // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on
+  // existing declarators (e.g. QualifiedTypeLoc). They cannot start the
+  // declarator themselves, but their underlying type can.
+  SourceLocation VisitTypeLoc(TypeLoc T) {
+    auto N = T.getNextTypeLoc();
+    if (!N)
+      return SourceLocation();
+    return Visit(N);
+  }
+
+  SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) {
+    if (T.getTypePtr()->hasTrailingReturn())
+      return SourceLocation(); // avoid recursing into the suffix of declarator.
+    return VisitTypeLoc(T);
+  }
+
+private:
+  template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) {
+    auto L = Visit(T.getPointeeLoc());
+    if (L.isValid())
+      return L;
+    return T.getLocalSourceRange().getBegin();
+  }
+};
+} // namespace
+
+static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) {
+  switch (E.getOperator()) {
+  // Comparison
+  case OO_EqualEqual:
+  case OO_ExclaimEqual:
+  case OO_Greater:
+  case OO_GreaterEqual:
+  case OO_Less:
+  case OO_LessEqual:
+  case OO_Spaceship:
+  // Assignment
+  case OO_Equal:
+  case OO_SlashEqual:
+  case OO_PercentEqual:
+  case OO_CaretEqual:
+  case OO_PipeEqual:
+  case OO_LessLessEqual:
+  case OO_GreaterGreaterEqual:
+  case OO_PlusEqual:
+  case OO_MinusEqual:
+  case OO_StarEqual:
+  case OO_AmpEqual:
+  // Binary computation
+  case OO_Slash:
+  case OO_Percent:
+  case OO_Caret:
+  case OO_Pipe:
+  case OO_LessLess:
+  case OO_GreaterGreater:
+  case OO_AmpAmp:
+  case OO_PipePipe:
+  case OO_ArrowStar:
+  case OO_Comma:
+    return syntax::NodeKind::BinaryOperatorExpression;
+  case OO_Tilde:
+  case OO_Exclaim:
+    return syntax::NodeKind::PrefixUnaryOperatorExpression;
+  // Prefix/Postfix increment/decrement
+  case OO_PlusPlus:
+  case OO_MinusMinus:
+    switch (E.getNumArgs()) {
+    case 1:
+      return syntax::NodeKind::PrefixUnaryOperatorExpression;
+    case 2:
+      return syntax::NodeKind::PostfixUnaryOperatorExpression;
+    default:
+      llvm_unreachable("Invalid number of arguments for operator");
+    }
+  // Operators that can be unary or binary
+  case OO_Plus:
+  case OO_Minus:
+  case OO_Star:
+  case OO_Amp:
+    switch (E.getNumArgs()) {
+    case 1:
+      return syntax::NodeKind::PrefixUnaryOperatorExpression;
+    case 2:
+      return syntax::NodeKind::BinaryOperatorExpression;
+    default:
+      llvm_unreachable("Invalid number of arguments for operator");
+    }
+    return syntax::NodeKind::BinaryOperatorExpression;
+  // Not yet supported by SyntaxTree
+  case OO_New:
+  case OO_Delete:
+  case OO_Array_New:
+  case OO_Array_Delete:
+  case OO_Coawait:
+  case OO_Call:
+  case OO_Subscript:
+  case OO_Arrow:
+    return syntax::NodeKind::UnknownExpression;
+  case OO_Conditional: // not overloadable
+  case NUM_OVERLOADED_OPERATORS:
+  case OO_None:
+    llvm_unreachable("Not an overloadable operator");
+  }
+  llvm_unreachable("Unknown OverloadedOperatorKind enum");
+}
+
+/// Gets the range of declarator as defined by the C++ grammar. E.g.
+///     `int a;` -> range of `a`,
+///     `int *a;` -> range of `*a`,
+///     `int a[10];` -> range of `a[10]`,
+///     `int a[1][2][3];` -> range of `a[1][2][3]`,
+///     `int *a = nullptr` -> range of `*a = nullptr`.
+/// FIMXE: \p Name must be a source range, e.g. for `operator+`.
+static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T,
+                                      SourceLocation Name,
+                                      SourceRange Initializer) {
+  SourceLocation Start = GetStartLoc().Visit(T);
+  SourceLocation End = T.getSourceRange().getEnd();
+  assert(End.isValid());
+  if (Name.isValid()) {
+    if (Start.isInvalid())
+      Start = Name;
+    if (SM.isBeforeInTranslationUnit(End, Name))
+      End = Name;
+  }
+  if (Initializer.isValid()) {
+    auto InitializerEnd = Initializer.getEnd();
+    assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) ||
+           End == InitializerEnd);
+    End = InitializerEnd;
+  }
+  return SourceRange(Start, End);
+}
+
+namespace {
+/// All AST hierarchy roots that can be represented as pointers.
+using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>;
+/// Maintains a mapping from AST to syntax tree nodes. This class will get more
+/// complicated as we support more kinds of AST nodes, e.g. TypeLocs.
+/// FIXME: expose this as public API.
+class ASTToSyntaxMapping {
+public:
+  void add(ASTPtr From, syntax::Tree *To) {
+    assert(To != nullptr);
+    assert(!From.isNull());
+
+    bool Added = Nodes.insert({From, To}).second;
+    (void)Added;
+    assert(Added && "mapping added twice");
+  }
+
+  syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); }
+
+private:
+  llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes;
+};
+} // namespace
+
 /// A helper class for constructing the syntax tree while traversing a clang
 /// AST.
 ///
@@ -57,30 +271,44 @@ public:
   }
 
   llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
+  const SourceManager &sourceManager() const { return Arena.sourceManager(); }
 
   /// Populate children for \p New node, assuming it covers tokens from \p
   /// Range.
-  void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
-
-  /// Must be called with the range of each `DeclaratorDecl`. Ensures the
-  /// corresponding declarator nodes are covered by `SimpleDeclaration`.
-  void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range);
+  void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
+                ASTPtr From) {
+    assert(New);
+    Pending.foldChildren(Arena, Range, New);
+    if (From)
+      Mapping.add(From, New);
+  }
+  void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
+                TypeLoc L) {
+    // FIXME: add mapping for TypeLocs
+    foldNode(Range, New, nullptr);
+  }
 
   /// Notifies that we should not consume trailing semicolon when computing
   /// token range of \p D.
-  void noticeDeclaratorWithoutSemicolon(Decl *D);
+  void noticeDeclWithoutSemicolon(Decl *D);
 
   /// Mark the \p Child node with a corresponding \p Role. All marked children
   /// should be consumed by foldNode.
-  /// (!) when called on expressions (clang::Expr is derived from clang::Stmt),
-  ///     wraps expressions into expression statement.
+  /// When called on expressions (clang::Expr is derived from clang::Stmt),
+  /// wraps expressions into expression statement.
   void markStmtChild(Stmt *Child, NodeRole Role);
   /// Should be called for expressions in non-statement position to avoid
   /// wrapping into expression statement.
   void markExprChild(Expr *Child, NodeRole Role);
-
   /// Set role for a token starting at \p Loc.
   void markChildToken(SourceLocation Loc, NodeRole R);
+  /// Set role for \p T.
+  void markChildToken(const syntax::Token *T, NodeRole R);
+
+  /// Set role for \p N.
+  void markChild(syntax::Node *N, NodeRole R);
+  /// Set role for the syntax node matching \p N.
+  void markChild(ASTPtr N, NodeRole R);
 
   /// Finish building the tree and consume the root node.
   syntax::TranslationUnit *finalize() && {
@@ -97,8 +325,16 @@ public:
     return TU;
   }
 
-  /// getRange() finds the syntax tokens corresponding to the passed source
-  /// locations.
+  /// Finds a token starting at \p L. The token must exist if \p L is valid.
+  const syntax::Token *findToken(SourceLocation L) const;
+
+  /// Finds the syntax tokens corresponding to the \p SourceRange.
+  llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const {
+    assert(Range.isValid());
+    return getRange(Range.getBegin(), Range.getEnd());
+  }
+
+  /// Finds the syntax tokens corresponding to the passed source locations.
   /// \p First is the start position of the first token and \p Last is the start
   /// position of the last token.
   llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
@@ -109,23 +345,62 @@ public:
            Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
   }
-  llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
-    auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc());
-    if (llvm::isa<NamespaceDecl>(D))
-      return Tokens;
-    if (DeclsWithoutSemicolons.count(D))
-      return Tokens;
-    // FIXME: do not consume trailing semicolon on function definitions.
-    // Most declarations own a semicolon in syntax trees, but not in clang AST.
-    return withTrailingSemicolon(Tokens);
+
+  llvm::ArrayRef<syntax::Token>
+  getTemplateRange(const ClassTemplateSpecializationDecl *D) const {
+    auto Tokens = getRange(D->getSourceRange());
+    return maybeAppendSemicolon(Tokens, D);
   }
+
+  /// Returns true if \p D is the last declarator in a chain and is thus
+  /// reponsible for creating SimpleDeclaration for the whole chain.
+  template <class T>
+  bool isResponsibleForCreatingDeclaration(const T *D) const {
+    static_assert((std::is_base_of<DeclaratorDecl, T>::value ||
+                   std::is_base_of<TypedefNameDecl, T>::value),
+                  "only DeclaratorDecl and TypedefNameDecl are supported.");
+
+    const Decl *Next = D->getNextDeclInContext();
+
+    // There's no next sibling, this one is responsible.
+    if (Next == nullptr) {
+      return true;
+    }
+    const auto *NextT = llvm::dyn_cast<T>(Next);
+
+    // Next sibling is not the same type, this one is responsible.
+    if (NextT == nullptr) {
+      return true;
+    }
+    // Next sibling doesn't begin at the same loc, it must be a different
+    // declaration, so this declarator is responsible.
+    if (NextT->getBeginLoc() != D->getBeginLoc()) {
+      return true;
+    }
+
+    // NextT is a member of the same declaration, and we need the last member to
+    // create declaration. This one is not responsible.
+    return false;
+  }
+
+  llvm::ArrayRef<syntax::Token> getDeclarationRange(Decl *D) {
+    llvm::ArrayRef<clang::syntax::Token> Tokens;
+    // We want to drop the template parameters for specializations.
+    if (const auto *S = llvm::dyn_cast<TagDecl>(D))
+      Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc());
+    else
+      Tokens = getRange(D->getSourceRange());
+    return maybeAppendSemicolon(Tokens, D);
+  }
+
   llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
-    return getRange(E->getBeginLoc(), E->getEndLoc());
+    return getRange(E->getSourceRange());
   }
+
   /// Find the adjusted range for the statement, consuming the trailing
   /// semicolon when needed.
   llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {
-    auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());
+    auto Tokens = getRange(S->getSourceRange());
     if (isa<CompoundStmt>(S))
       return Tokens;
 
@@ -138,17 +413,31 @@ public:
 
 private:
   llvm::ArrayRef<syntax::Token>
+  maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens,
+                       const Decl *D) const {
+    if (llvm::isa<NamespaceDecl>(D))
+      return Tokens;
+    if (DeclsWithoutSemicolons.count(D))
+      return Tokens;
+    // FIXME: do not consume trailing semicolon on function definitions.
+    // Most declarations own a semicolon in syntax trees, but not in clang AST.
+    return withTrailingSemicolon(Tokens);
+  }
+
+  llvm::ArrayRef<syntax::Token>
   withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const {
     assert(!Tokens.empty());
     assert(Tokens.back().kind() != tok::eof);
-    // (!) we never consume 'eof', so looking at the next token is ok.
+    // We never consume 'eof', so looking at the next token is ok.
     if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
       return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
     return Tokens;
   }
 
-  /// Finds a token starting at \p L. The token must exist.
-  const syntax::Token *findToken(SourceLocation L) const;
+  void setRole(syntax::Node *N, NodeRole R) {
+    assert(N->role() == NodeRole::Detached);
+    N->setRole(R);
+  }
 
   /// A collection of trees covering the input tokens.
   /// When created, each tree corresponds to a single token in the file.
@@ -166,12 +455,10 @@ private:
         auto *L = new (A.allocator()) syntax::Leaf(&T);
         L->Original = true;
         L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue();
-        Trees.insert(Trees.end(), {&T, NodeAndRole{L}});
+        Trees.insert(Trees.end(), {&T, L});
       }
     }
 
-    ~Forest() { assert(DelayedFolds.empty()); }
-
     void assignRole(llvm::ArrayRef<syntax::Token> Range,
                     syntax::NodeRole Role) {
       assert(!Range.empty());
@@ -181,56 +468,49 @@ private:
       assert((std::next(It) == Trees.end() ||
               std::next(It)->first == Range.end()) &&
              "no child with the specified range");
-      It->second.Role = Role;
+      assert(It->second->role() == NodeRole::Detached &&
+             "re-assigning role for a child");
+      It->second->setRole(Role);
     }
 
     /// Add \p Node to the forest and attach child nodes based on \p Tokens.
     void foldChildren(const syntax::Arena &A,
                       llvm::ArrayRef<syntax::Token> Tokens,
                       syntax::Tree *Node) {
-      // Execute delayed folds inside `Tokens`.
-      auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin());
-      auto It = BeginExecuted;
-      for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It)
-        foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End),
-                          It->second.Node);
-      DelayedFolds.erase(BeginExecuted, It);
-
       // Attach children to `Node`.
-      foldChildrenEager(A, Tokens, Node);
-    }
+      assert(Node->firstChild() == nullptr && "node already has children");
 
-    /// Schedule a call to `foldChildren` that will only be executed when
-    /// containing node is folded. The range of delayed nodes can be extended by
-    /// calling `extendDelayedFold`. Only one delayed node for each starting
-    /// token is allowed.
-    void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens,
-                             syntax::Tree *Node) {
-      assert(!Tokens.empty());
-      bool Inserted =
-          DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}})
-              .second;
-      (void)Inserted;
-      assert(Inserted && "Multiple delayed folds start at the same token");
-    }
+      auto *FirstToken = Tokens.begin();
+      auto BeginChildren = Trees.lower_bound(FirstToken);
 
-    /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends
-    /// its endpoint to `ExtendedRange.end()` and returns true.
-    /// Otherwise, returns false.
-    bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) {
-      assert(!ExtendedRange.empty());
-      auto It = DelayedFolds.find(ExtendedRange.data());
-      if (It == DelayedFolds.end())
-        return false;
-      assert(It->second.End <= ExtendedRange.end());
-      It->second.End = ExtendedRange.end();
-      return true;
+      assert((BeginChildren == Trees.end() ||
+              BeginChildren->first == FirstToken) &&
+             "fold crosses boundaries of existing subtrees");
+      auto EndChildren = Trees.lower_bound(Tokens.end());
+      assert(
+          (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
+          "fold crosses boundaries of existing subtrees");
+
+      // We need to go in reverse order, because we can only prepend.
+      for (auto It = EndChildren; It != BeginChildren; --It) {
+        auto *C = std::prev(It)->second;
+        if (C->role() == NodeRole::Detached)
+          C->setRole(NodeRole::Unknown);
+        Node->prependChildLowLevel(C);
+      }
+
+      // Mark that this node came from the AST and is backed by the source code.
+      Node->Original = true;
+      Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
+
+      Trees.erase(BeginChildren, EndChildren);
+      Trees.insert({FirstToken, Node});
     }
 
     // EXPECTS: all tokens were consumed and are owned by a single root node.
     syntax::Node *finalize() && {
       assert(Trees.size() == 1);
-      auto *Root = Trees.begin()->second.Node;
+      auto *Root = Trees.begin()->second;
       Trees = {};
       return Root;
     }
@@ -243,66 +523,19 @@ private:
                 ? (std::next(It)->first - It->first)
                 : A.tokenBuffer().expandedTokens().end() - It->first;
 
-        R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n",
-                           It->second.Node->kind(),
-                           It->first->text(A.sourceManager()), CoveredTokens);
-        R += It->second.Node->dump(A);
+        R += std::string(llvm::formatv(
+            "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(),
+            It->first->text(A.sourceManager()), CoveredTokens));
+        R += It->second->dump(A);
       }
       return R;
     }
 
   private:
-    /// Implementation detail of `foldChildren`, does acutal folding ignoring
-    /// delayed folds.
-    void foldChildrenEager(const syntax::Arena &A,
-                           llvm::ArrayRef<syntax::Token> Tokens,
-                           syntax::Tree *Node) {
-      assert(Node->firstChild() == nullptr && "node already has children");
-
-      auto *FirstToken = Tokens.begin();
-      auto BeginChildren = Trees.lower_bound(FirstToken);
-      assert((BeginChildren == Trees.end() ||
-              BeginChildren->first == FirstToken) &&
-             "fold crosses boundaries of existing subtrees");
-      auto EndChildren = Trees.lower_bound(Tokens.end());
-      assert(
-          (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
-          "fold crosses boundaries of existing subtrees");
-
-      // (!) we need to go in reverse order, because we can only prepend.
-      for (auto It = EndChildren; It != BeginChildren; --It)
-        Node->prependChildLowLevel(std::prev(It)->second.Node,
-                                   std::prev(It)->second.Role);
-
-      // Mark that this node came from the AST and is backed by the source code.
-      Node->Original = true;
-      Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
-
-      Trees.erase(BeginChildren, EndChildren);
-      Trees.insert({FirstToken, NodeAndRole(Node)});
-    }
-    /// A with a role that should be assigned to it when adding to a parent.
-    struct NodeAndRole {
-      explicit NodeAndRole(syntax::Node *Node)
-          : Node(Node), Role(NodeRole::Unknown) {}
-
-      syntax::Node *Node;
-      NodeRole Role;
-    };
-
     /// Maps from the start token to a subtree starting at that token.
     /// Keys in the map are pointers into the array of expanded tokens, so
     /// pointer order corresponds to the order of preprocessor tokens.
-    /// FIXME: storing the end tokens is redundant.
-    /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
-    std::map<const syntax::Token *, NodeAndRole> Trees;
-
-    /// See documentation of `foldChildrenDelayed` for details.
-    struct DelayedFold {
-      const syntax::Token *End = nullptr;
-      syntax::Tree *Node = nullptr;
-    };
-    std::map<const syntax::Token *, DelayedFold> DelayedFolds;
+    std::map<const syntax::Token *, syntax::Node *> Trees;
   };
 
   /// For debugging purposes.
@@ -314,49 +547,91 @@ private:
       LocationToToken;
   Forest Pending;
   llvm::DenseSet<Decl *> DeclsWithoutSemicolons;
+  ASTToSyntaxMapping Mapping;
 };
 
 namespace {
 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
 public:
-  explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
-      : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
+  explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder)
+      : Builder(Builder), Context(Context) {}
 
   bool shouldTraversePostOrder() const { return true; }
 
-  bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) {
-    // Ensure declarators are covered by SimpleDeclaration.
-    Builder.noticeDeclaratorRange(Builder.getRange(D));
-    // FIXME: build nodes for the declarator too.
-    return true;
+  bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) {
+    return processDeclaratorAndDeclaration(DD);
   }
-  bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) {
-    // Also a declarator.
-    Builder.noticeDeclaratorRange(Builder.getRange(D));
-    // FIXME: build nodes for the declarator too.
-    return true;
+
+  bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) {
+    return processDeclaratorAndDeclaration(TD);
   }
 
   bool VisitDecl(Decl *D) {
     assert(!D->isImplicit());
-    Builder.foldNode(Builder.getRange(D),
-                     new (allocator()) syntax::UnknownDeclaration());
+    Builder.foldNode(Builder.getDeclarationRange(D),
+                     new (allocator()) syntax::UnknownDeclaration(), D);
+    return true;
+  }
+
+  // RAV does not call WalkUpFrom* on explicit instantiations, so we have to
+  // override Traverse.
+  // FIXME: make RAV call WalkUpFrom* instead.
+  bool
+  TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) {
+    if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C))
+      return false;
+    if (C->isExplicitSpecialization())
+      return true; // we are only interested in explicit instantiations.
+    auto *Declaration =
+        cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C));
+    foldExplicitTemplateInstantiation(
+        Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()),
+        Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C);
+    return true;
+  }
+
+  bool WalkUpFromTemplateDecl(TemplateDecl *S) {
+    foldTemplateDeclaration(
+        Builder.getDeclarationRange(S),
+        Builder.findToken(S->getTemplateParameters()->getTemplateLoc()),
+        Builder.getDeclarationRange(S->getTemplatedDecl()), S);
     return true;
   }
 
   bool WalkUpFromTagDecl(TagDecl *C) {
     // FIXME: build the ClassSpecifier node.
-    if (C->isFreeStanding()) {
-      // Class is a declaration specifier and needs a spanning declaration node.
-      Builder.foldNode(Builder.getRange(C),
-                       new (allocator()) syntax::SimpleDeclaration);
+    if (!C->isFreeStanding()) {
+      assert(C->getNumTemplateParameterLists() == 0);
       return true;
     }
+    handleFreeStandingTagDecl(C);
     return true;
   }
 
+  syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) {
+    assert(C->isFreeStanding());
+    // Class is a declaration specifier and needs a spanning declaration node.
+    auto DeclarationRange = Builder.getDeclarationRange(C);
+    syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration;
+    Builder.foldNode(DeclarationRange, Result, nullptr);
+
+    // Build TemplateDeclaration nodes if we had template parameters.
+    auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) {
+      const auto *TemplateKW = Builder.findToken(L.getTemplateLoc());
+      auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end());
+      Result =
+          foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr);
+      DeclarationRange = R;
+    };
+    if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C))
+      ConsumeTemplateParameters(*S->getTemplateParameters());
+    for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I)
+      ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1));
+    return Result;
+  }
+
   bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
-    // (!) we do not want to call VisitDecl(), the declaration for translation
+    // We do not want to call VisitDecl(), the declaration for translation
     // unit is built by finalize().
     return true;
   }
@@ -370,14 +645,14 @@ public:
     Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen);
 
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::CompoundStatement);
+                     new (allocator()) syntax::CompoundStatement, S);
     return true;
   }
 
   // Some statements are not yet handled by syntax trees.
   bool WalkUpFromStmt(Stmt *S) {
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::UnknownStatement);
+                     new (allocator()) syntax::UnknownStatement, S);
     return true;
   }
 
@@ -386,27 +661,28 @@ public:
     // RAV traverses it as a statement, we produce invalid node kinds in that
     // case.
     // FIXME: should do this in RAV instead?
-    if (S->getInit() && !TraverseStmt(S->getInit()))
-      return false;
-    if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
-      return false;
-    if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
-      return false;
-    if (S->getBody() && !TraverseStmt(S->getBody()))
-      return false;
-    return true;
+    bool Result = [&, this]() {
+      if (S->getInit() && !TraverseStmt(S->getInit()))
+        return false;
+      if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
+        return false;
+      if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
+        return false;
+      if (S->getBody() && !TraverseStmt(S->getBody()))
+        return false;
+      return true;
+    }();
+    WalkUpFromCXXForRangeStmt(S);
+    return Result;
   }
 
   bool TraverseStmt(Stmt *S) {
     if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) {
       // We want to consume the semicolon, make sure SimpleDeclaration does not.
       for (auto *D : DS->decls())
-        Builder.noticeDeclaratorWithoutSemicolon(D);
+        Builder.noticeDeclWithoutSemicolon(D);
     } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
-      // (!) do not recurse into subexpressions.
-      // we do not have syntax trees for expressions yet, so we only want to see
-      // the first top-level expression.
-      return WalkUpFromExpr(E->IgnoreImplicit());
+      return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit());
     }
     return RecursiveASTVisitor::TraverseStmt(S);
   }
@@ -415,19 +691,306 @@ public:
   bool WalkUpFromExpr(Expr *E) {
     assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
     Builder.foldNode(Builder.getExprRange(E),
-                     new (allocator()) syntax::UnknownExpression);
+                     new (allocator()) syntax::UnknownExpression, E);
+    return true;
+  }
+
+  syntax::NestedNameSpecifier *
+  BuildNestedNameSpecifier(NestedNameSpecifierLoc QualifierLoc) {
+    if (!QualifierLoc)
+      return nullptr;
+    for (auto it = QualifierLoc; it; it = it.getPrefix()) {
+      auto *NS = new (allocator()) syntax::NameSpecifier;
+      Builder.foldNode(Builder.getRange(it.getLocalSourceRange()), NS, nullptr);
+      Builder.markChild(NS, syntax::NodeRole::NestedNameSpecifier_specifier);
+    }
+    auto *NNS = new (allocator()) syntax::NestedNameSpecifier;
+    Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), NNS,
+                     nullptr);
+    return NNS;
+  }
+
+  bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) {
+    // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node
+    // referencing the location of the UDL suffix (`_w` in `1.2_w`). The
+    // UDL suffix location does not point to the beginning of a token, so we
+    // can't represent the UDL suffix as a separate syntax tree node.
+
+    return WalkUpFromUserDefinedLiteral(S);
+  }
+
+  syntax::UserDefinedLiteralExpression *
+  buildUserDefinedLiteral(UserDefinedLiteral *S) {
+    switch (S->getLiteralOperatorKind()) {
+    case clang::UserDefinedLiteral::LOK_Integer:
+      return new (allocator()) syntax::IntegerUserDefinedLiteralExpression;
+    case clang::UserDefinedLiteral::LOK_Floating:
+      return new (allocator()) syntax::FloatUserDefinedLiteralExpression;
+    case clang::UserDefinedLiteral::LOK_Character:
+      return new (allocator()) syntax::CharUserDefinedLiteralExpression;
+    case clang::UserDefinedLiteral::LOK_String:
+      return new (allocator()) syntax::StringUserDefinedLiteralExpression;
+    case clang::UserDefinedLiteral::LOK_Raw:
+    case clang::UserDefinedLiteral::LOK_Template:
+      // For raw literal operator and numeric literal operator template we
+      // cannot get the type of the operand in the semantic AST. We get this
+      // information from the token. As integer and floating point have the same
+      // token kind, we run `NumericLiteralParser` again to distinguish them.
+      auto TokLoc = S->getBeginLoc();
+      auto TokSpelling =
+          Builder.findToken(TokLoc)->text(Context.getSourceManager());
+      auto Literal =
+          NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(),
+                               Context.getLangOpts(), Context.getTargetInfo(),
+                               Context.getDiagnostics());
+      if (Literal.isIntegerLiteral())
+        return new (allocator()) syntax::IntegerUserDefinedLiteralExpression;
+      else {
+        assert(Literal.isFloatingLiteral());
+        return new (allocator()) syntax::FloatUserDefinedLiteralExpression;
+      }
+    }
+    llvm_unreachable("Unknown literal operator kind.");
+  }
+
+  bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) {
+    Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);
+    Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S);
+    return true;
+  }
+
+  bool WalkUpFromDeclRefExpr(DeclRefExpr *S) {
+    if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc()))
+      Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier);
+
+    auto *unqualifiedId = new (allocator()) syntax::UnqualifiedId;
+    // Get `UnqualifiedId` from `DeclRefExpr`.
+    // FIXME: Extract this logic so that it can be used by `MemberExpr`,
+    // and other semantic constructs, now it is tied to `DeclRefExpr`.
+    if (!S->hasExplicitTemplateArgs()) {
+      Builder.foldNode(Builder.getRange(S->getNameInfo().getSourceRange()),
+                       unqualifiedId, nullptr);
+    } else {
+      auto templateIdSourceRange =
+          SourceRange(S->getNameInfo().getBeginLoc(), S->getRAngleLoc());
+      Builder.foldNode(Builder.getRange(templateIdSourceRange), unqualifiedId,
+                       nullptr);
+    }
+    Builder.markChild(unqualifiedId, syntax::NodeRole::IdExpression_id);
+
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::IdExpression, S);
+    return true;
+  }
+
+  bool WalkUpFromParenExpr(ParenExpr *S) {
+    Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen);
+    Builder.markExprChild(S->getSubExpr(),
+                          syntax::NodeRole::ParenExpression_subExpression);
+    Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::ParenExpression, S);
+    return true;
+  }
+
+  bool WalkUpFromIntegerLiteral(IntegerLiteral *S) {
+    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::IntegerLiteralExpression, S);
+    return true;
+  }
+
+  bool WalkUpFromCharacterLiteral(CharacterLiteral *S) {
+    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::CharacterLiteralExpression, S);
+    return true;
+  }
+
+  bool WalkUpFromFloatingLiteral(FloatingLiteral *S) {
+    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::FloatingLiteralExpression, S);
+    return true;
+  }
+
+  bool WalkUpFromStringLiteral(StringLiteral *S) {
+    Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::StringLiteralExpression, S);
+    return true;
+  }
+
+  bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) {
+    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::BoolLiteralExpression, S);
+    return true;
+  }
+
+  bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) {
+    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::CxxNullPtrExpression, S);
+    return true;
+  }
+
+  bool WalkUpFromUnaryOperator(UnaryOperator *S) {
+    Builder.markChildToken(S->getOperatorLoc(),
+                           syntax::NodeRole::OperatorExpression_operatorToken);
+    Builder.markExprChild(S->getSubExpr(),
+                          syntax::NodeRole::UnaryOperatorExpression_operand);
+
+    if (S->isPostfix())
+      Builder.foldNode(Builder.getExprRange(S),
+                       new (allocator()) syntax::PostfixUnaryOperatorExpression,
+                       S);
+    else
+      Builder.foldNode(Builder.getExprRange(S),
+                       new (allocator()) syntax::PrefixUnaryOperatorExpression,
+                       S);
+
+    return true;
+  }
+
+  bool WalkUpFromBinaryOperator(BinaryOperator *S) {
+    Builder.markExprChild(
+        S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide);
+    Builder.markChildToken(S->getOperatorLoc(),
+                           syntax::NodeRole::OperatorExpression_operatorToken);
+    Builder.markExprChild(
+        S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::BinaryOperatorExpression, S);
     return true;
   }
 
+  bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) {
+    if (getOperatorNodeKind(*S) ==
+        syntax::NodeKind::PostfixUnaryOperatorExpression) {
+      // A postfix unary operator is declared as taking two operands. The
+      // second operand is used to distinguish from its prefix counterpart. In
+      // the semantic AST this "phantom" operand is represented as a
+      // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this
+      // operand because it does not correspond to anything written in source
+      // code
+      for (auto *child : S->children()) {
+        if (child->getSourceRange().isInvalid())
+          continue;
+        if (!TraverseStmt(child))
+          return false;
+      }
+      return WalkUpFromCXXOperatorCallExpr(S);
+    } else
+      return RecursiveASTVisitor::TraverseCXXOperatorCallExpr(S);
+  }
+
+  bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) {
+    switch (getOperatorNodeKind(*S)) {
+    case syntax::NodeKind::BinaryOperatorExpression:
+      Builder.markExprChild(
+          S->getArg(0),
+          syntax::NodeRole::BinaryOperatorExpression_leftHandSide);
+      Builder.markChildToken(
+          S->getOperatorLoc(),
+          syntax::NodeRole::OperatorExpression_operatorToken);
+      Builder.markExprChild(
+          S->getArg(1),
+          syntax::NodeRole::BinaryOperatorExpression_rightHandSide);
+      Builder.foldNode(Builder.getExprRange(S),
+                       new (allocator()) syntax::BinaryOperatorExpression, S);
+      return true;
+    case syntax::NodeKind::PrefixUnaryOperatorExpression:
+      Builder.markChildToken(
+          S->getOperatorLoc(),
+          syntax::NodeRole::OperatorExpression_operatorToken);
+      Builder.markExprChild(S->getArg(0),
+                            syntax::NodeRole::UnaryOperatorExpression_operand);
+      Builder.foldNode(Builder.getExprRange(S),
+                       new (allocator()) syntax::PrefixUnaryOperatorExpression,
+                       S);
+      return true;
+    case syntax::NodeKind::PostfixUnaryOperatorExpression:
+      Builder.markChildToken(
+          S->getOperatorLoc(),
+          syntax::NodeRole::OperatorExpression_operatorToken);
+      Builder.markExprChild(S->getArg(0),
+                            syntax::NodeRole::UnaryOperatorExpression_operand);
+      Builder.foldNode(Builder.getExprRange(S),
+                       new (allocator()) syntax::PostfixUnaryOperatorExpression,
+                       S);
+      return true;
+    case syntax::NodeKind::UnknownExpression:
+      return RecursiveASTVisitor::WalkUpFromCXXOperatorCallExpr(S);
+    default:
+      llvm_unreachable("getOperatorNodeKind() does not return this value");
+    }
+  }
+
   bool WalkUpFromNamespaceDecl(NamespaceDecl *S) {
-    auto Tokens = Builder.getRange(S);
+    auto Tokens = Builder.getDeclarationRange(S);
     if (Tokens.front().kind() == tok::coloncolon) {
       // Handle nested namespace definitions. Those start at '::' token, e.g.
       // namespace a^::b {}
       // FIXME: build corresponding nodes for the name of this namespace.
       return true;
     }
-    Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition);
+    Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S);
+    return true;
+  }
+
+  bool TraverseParenTypeLoc(ParenTypeLoc L) {
+    // We reverse order of traversal to get the proper syntax structure.
+    if (!WalkUpFromParenTypeLoc(L))
+      return false;
+    return TraverseTypeLoc(L.getInnerLoc());
+  }
+
+  bool WalkUpFromParenTypeLoc(ParenTypeLoc L) {
+    Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
+    Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
+    Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()),
+                     new (allocator()) syntax::ParenDeclarator, L);
+    return true;
+  }
+
+  // Declarator chunks, they are produced by type locs and some clang::Decls.
+  bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) {
+    Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen);
+    Builder.markExprChild(L.getSizeExpr(),
+                          syntax::NodeRole::ArraySubscript_sizeExpression);
+    Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen);
+    Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()),
+                     new (allocator()) syntax::ArraySubscript, L);
+    return true;
+  }
+
+  bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) {
+    Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
+    for (auto *P : L.getParams()) {
+      Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter);
+    }
+    Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
+    Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()),
+                     new (allocator()) syntax::ParametersAndQualifiers, L);
+    return true;
+  }
+
+  bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) {
+    if (!L.getTypePtr()->hasTrailingReturn())
+      return WalkUpFromFunctionTypeLoc(L);
+
+    auto *TrailingReturnTokens = BuildTrailingReturn(L);
+    // Finish building the node for parameters.
+    Builder.markChild(TrailingReturnTokens,
+                      syntax::NodeRole::ParametersAndQualifiers_trailingReturn);
+    return WalkUpFromFunctionTypeLoc(L);
+  }
+
+  bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) {
+    auto SR = L.getLocalSourceRange();
+    Builder.foldNode(Builder.getRange(SR),
+                     new (allocator()) syntax::MemberPointer, L);
     return true;
   }
 
@@ -436,13 +999,13 @@ public:
   // and fold resulting nodes.
   bool WalkUpFromDeclStmt(DeclStmt *S) {
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::DeclarationStatement);
+                     new (allocator()) syntax::DeclarationStatement, S);
     return true;
   }
 
   bool WalkUpFromNullStmt(NullStmt *S) {
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::EmptyStatement);
+                     new (allocator()) syntax::EmptyStatement, S);
     return true;
   }
 
@@ -451,7 +1014,7 @@ public:
                            syntax::NodeRole::IntroducerKeyword);
     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::SwitchStatement);
+                     new (allocator()) syntax::SwitchStatement, S);
     return true;
   }
 
@@ -461,7 +1024,7 @@ public:
     Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::CaseStatement);
+                     new (allocator()) syntax::CaseStatement, S);
     return true;
   }
 
@@ -470,7 +1033,7 @@ public:
                            syntax::NodeRole::IntroducerKeyword);
     Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::DefaultStatement);
+                     new (allocator()) syntax::DefaultStatement, S);
     return true;
   }
 
@@ -483,7 +1046,7 @@ public:
     Builder.markStmtChild(S->getElse(),
                           syntax::NodeRole::IfStatement_elseStatement);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::IfStatement);
+                     new (allocator()) syntax::IfStatement, S);
     return true;
   }
 
@@ -491,7 +1054,7 @@ public:
     Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::ForStatement);
+                     new (allocator()) syntax::ForStatement, S);
     return true;
   }
 
@@ -500,7 +1063,7 @@ public:
                            syntax::NodeRole::IntroducerKeyword);
     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::WhileStatement);
+                     new (allocator()) syntax::WhileStatement, S);
     return true;
   }
 
@@ -508,7 +1071,7 @@ public:
     Builder.markChildToken(S->getContinueLoc(),
                            syntax::NodeRole::IntroducerKeyword);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::ContinueStatement);
+                     new (allocator()) syntax::ContinueStatement, S);
     return true;
   }
 
@@ -516,7 +1079,7 @@ public:
     Builder.markChildToken(S->getBreakLoc(),
                            syntax::NodeRole::IntroducerKeyword);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::BreakStatement);
+                     new (allocator()) syntax::BreakStatement, S);
     return true;
   }
 
@@ -526,7 +1089,7 @@ public:
     Builder.markExprChild(S->getRetValue(),
                           syntax::NodeRole::ReturnStatement_value);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::ReturnStatement);
+                     new (allocator()) syntax::ReturnStatement, S);
     return true;
   }
 
@@ -534,13 +1097,13 @@ public:
     Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
     Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
     Builder.foldNode(Builder.getStmtRange(S),
-                     new (allocator()) syntax::RangeBasedForStatement);
+                     new (allocator()) syntax::RangeBasedForStatement, S);
     return true;
   }
 
   bool WalkUpFromEmptyDecl(EmptyDecl *S) {
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::EmptyDeclaration);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::EmptyDeclaration, S);
     return true;
   }
 
@@ -549,76 +1112,175 @@ public:
                           syntax::NodeRole::StaticAssertDeclaration_condition);
     Builder.markExprChild(S->getMessage(),
                           syntax::NodeRole::StaticAssertDeclaration_message);
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::StaticAssertDeclaration);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::StaticAssertDeclaration, S);
     return true;
   }
 
   bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) {
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::LinkageSpecificationDeclaration);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::LinkageSpecificationDeclaration,
+                     S);
     return true;
   }
 
   bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) {
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::NamespaceAliasDefinition);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::NamespaceAliasDefinition, S);
     return true;
   }
 
   bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) {
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::UsingNamespaceDirective);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::UsingNamespaceDirective, S);
     return true;
   }
 
   bool WalkUpFromUsingDecl(UsingDecl *S) {
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::UsingDeclaration);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::UsingDeclaration, S);
     return true;
   }
 
   bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) {
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::UsingDeclaration);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::UsingDeclaration, S);
     return true;
   }
 
   bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) {
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::UsingDeclaration);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::UsingDeclaration, S);
     return true;
   }
 
   bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) {
-    Builder.foldNode(Builder.getRange(S),
-                     new (allocator()) syntax::TypeAliasDeclaration);
+    Builder.foldNode(Builder.getDeclarationRange(S),
+                     new (allocator()) syntax::TypeAliasDeclaration, S);
     return true;
   }
 
 private:
+  template <class T> SourceLocation getQualifiedNameStart(T *D) {
+    static_assert((std::is_base_of<DeclaratorDecl, T>::value ||
+                   std::is_base_of<TypedefNameDecl, T>::value),
+                  "only DeclaratorDecl and TypedefNameDecl are supported.");
+
+    auto DN = D->getDeclName();
+    bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo();
+    if (IsAnonymous)
+      return SourceLocation();
+
+    if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D)) {
+      if (DD->getQualifierLoc()) {
+        return DD->getQualifierLoc().getBeginLoc();
+      }
+    }
+
+    return D->getLocation();
+  }
+
+  SourceRange getInitializerRange(Decl *D) {
+    if (auto *V = llvm::dyn_cast<VarDecl>(D)) {
+      auto *I = V->getInit();
+      // Initializers in range-based-for are not part of the declarator
+      if (I && !V->isCXXForRangeDecl())
+        return I->getSourceRange();
+    }
+
+    return SourceRange();
+  }
+
+  /// Folds SimpleDeclarator node (if present) and in case this is the last
+  /// declarator in the chain it also folds SimpleDeclaration node.
+  template <class T> bool processDeclaratorAndDeclaration(T *D) {
+    SourceRange Initializer = getInitializerRange(D);
+    auto Range = getDeclaratorRange(Builder.sourceManager(),
+                                    D->getTypeSourceInfo()->getTypeLoc(),
+                                    getQualifiedNameStart(D), Initializer);
+
+    // There doesn't have to be a declarator (e.g. `void foo(int)` only has
+    // declaration, but no declarator).
+    if (Range.getBegin().isValid()) {
+      auto *N = new (allocator()) syntax::SimpleDeclarator;
+      Builder.foldNode(Builder.getRange(Range), N, nullptr);
+      Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator);
+    }
+
+    if (Builder.isResponsibleForCreatingDeclaration(D)) {
+      Builder.foldNode(Builder.getDeclarationRange(D),
+                       new (allocator()) syntax::SimpleDeclaration, D);
+    }
+    return true;
+  }
+
+  /// Returns the range of the built node.
+  syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) {
+    assert(L.getTypePtr()->hasTrailingReturn());
+
+    auto ReturnedType = L.getReturnLoc();
+    // Build node for the declarator, if any.
+    auto ReturnDeclaratorRange =
+        getDeclaratorRange(this->Builder.sourceManager(), ReturnedType,
+                           /*Name=*/SourceLocation(),
+                           /*Initializer=*/SourceLocation());
+    syntax::SimpleDeclarator *ReturnDeclarator = nullptr;
+    if (ReturnDeclaratorRange.isValid()) {
+      ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator;
+      Builder.foldNode(Builder.getRange(ReturnDeclaratorRange),
+                       ReturnDeclarator, nullptr);
+    }
+
+    // Build node for trailing return type.
+    auto Return = Builder.getRange(ReturnedType.getSourceRange());
+    const auto *Arrow = Return.begin() - 1;
+    assert(Arrow->kind() == tok::arrow);
+    auto Tokens = llvm::makeArrayRef(Arrow, Return.end());
+    Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken);
+    if (ReturnDeclarator)
+      Builder.markChild(ReturnDeclarator,
+                        syntax::NodeRole::TrailingReturnType_declarator);
+    auto *R = new (allocator()) syntax::TrailingReturnType;
+    Builder.foldNode(Tokens, R, L);
+    return R;
+  }
+
+  void foldExplicitTemplateInstantiation(
+      ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW,
+      const syntax::Token *TemplateKW,
+      syntax::SimpleDeclaration *InnerDeclaration, Decl *From) {
+    assert(!ExternKW || ExternKW->kind() == tok::kw_extern);
+    assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
+    Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword);
+    Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(
+        InnerDeclaration,
+        syntax::NodeRole::ExplicitTemplateInstantiation_declaration);
+    Builder.foldNode(
+        Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From);
+  }
+
+  syntax::TemplateDeclaration *foldTemplateDeclaration(
+      ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW,
+      ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) {
+    assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
+    Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
+
+    auto *N = new (allocator()) syntax::TemplateDeclaration;
+    Builder.foldNode(Range, N, From);
+    Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration);
+    return N;
+  }
+
   /// A small helper to save some typing.
   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
 
   syntax::TreeBuilder &Builder;
-  const LangOptions &LangOpts;
+  const ASTContext &Context;
 };
 } // namespace
 
-void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
-                                   syntax::Tree *New) {
-  Pending.foldChildren(Arena, Range, New);
-}
-
-void syntax::TreeBuilder::noticeDeclaratorRange(
-    llvm::ArrayRef<syntax::Token> Range) {
-  if (Pending.extendDelayedFold(Range))
-    return;
-  Pending.foldChildrenDelayed(Range,
-                              new (allocator()) syntax::SimpleDeclaration);
-}
-
-void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) {
+void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) {
   DeclsWithoutSemicolons.insert(D);
 }
 
@@ -628,31 +1290,55 @@ void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) {
   Pending.assignRole(*findToken(Loc), Role);
 }
 
+void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) {
+  if (!T)
+    return;
+  Pending.assignRole(*T, R);
+}
+
+void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) {
+  assert(N);
+  setRole(N, R);
+}
+
+void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) {
+  auto *SN = Mapping.find(N);
+  assert(SN != nullptr);
+  setRole(SN, R);
+}
+
 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
   if (!Child)
     return;
 
-  auto Range = getStmtRange(Child);
-  // This is an expression in a statement position, consume the trailing
-  // semicolon and form an 'ExpressionStatement' node.
-  if (auto *E = dyn_cast<Expr>(Child)) {
-    Pending.assignRole(getExprRange(E),
-                       NodeRole::ExpressionStatement_expression);
-    // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
-    Pending.foldChildren(Arena, Range,
-                         new (allocator()) syntax::ExpressionStatement);
-  }
-  Pending.assignRole(Range, Role);
+  syntax::Tree *ChildNode;
+  if (Expr *ChildExpr = dyn_cast<Expr>(Child)) {
+    // This is an expression in a statement position, consume the trailing
+    // semicolon and form an 'ExpressionStatement' node.
+    markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression);
+    ChildNode = new (allocator()) syntax::ExpressionStatement;
+    // (!) 'getStmtRange()' ensures this covers a trailing semicolon.
+    Pending.foldChildren(Arena, getStmtRange(Child), ChildNode);
+  } else {
+    ChildNode = Mapping.find(Child);
+  }
+  assert(ChildNode != nullptr);
+  setRole(ChildNode, Role);
 }
 
 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {
   if (!Child)
     return;
+  Child = Child->IgnoreImplicit();
 
-  Pending.assignRole(getExprRange(Child), Role);
+  syntax::Tree *ChildNode = Mapping.find(Child);
+  assert(ChildNode != nullptr);
+  setRole(ChildNode, Role);
 }
 
 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
+  if (L.isInvalid())
+    return nullptr;
   auto It = LocationToToken.find(L.getRawEncoding());
   assert(It != LocationToToken.end());
   return It->second;
diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp
index 72458528202e5..24048b297a112 100644
--- a/clang/lib/Tooling/Syntax/Mutations.cpp
+++ b/clang/lib/Tooling/Syntax/Mutations.cpp
@@ -35,7 +35,7 @@ public:
     assert(!New->isDetached());
     assert(Role != NodeRole::Detached);
 
-    New->Role = static_cast<unsigned>(Role);
+    New->setRole(Role);
     auto *P = Anchor->parent();
     P->replaceChildRangeLowLevel(Anchor, Anchor, New);
 
diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp
index 5b0c5107c134c..2435ae0a91dd6 100644
--- a/clang/lib/Tooling/Syntax/Nodes.cpp
+++ b/clang/lib/Tooling/Syntax/Nodes.cpp
@@ -18,6 +18,38 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
     return OS << "TranslationUnit";
   case NodeKind::UnknownExpression:
     return OS << "UnknownExpression";
+  case NodeKind::ParenExpression:
+    return OS << "ParenExpression";
+  case NodeKind::IntegerLiteralExpression:
+    return OS << "IntegerLiteralExpression";
+  case NodeKind::CharacterLiteralExpression:
+    return OS << "CharacterLiteralExpression";
+  case NodeKind::FloatingLiteralExpression:
+    return OS << "FloatingLiteralExpression";
+  case NodeKind::StringLiteralExpression:
+    return OS << "StringLiteralExpression";
+  case NodeKind::BoolLiteralExpression:
+    return OS << "BoolLiteralExpression";
+  case NodeKind::CxxNullPtrExpression:
+    return OS << "CxxNullPtrExpression";
+  case NodeKind::IntegerUserDefinedLiteralExpression:
+    return OS << "IntegerUserDefinedLiteralExpression";
+  case NodeKind::FloatUserDefinedLiteralExpression:
+    return OS << "FloatUserDefinedLiteralExpression";
+  case NodeKind::CharUserDefinedLiteralExpression:
+    return OS << "CharUserDefinedLiteralExpression";
+  case NodeKind::StringUserDefinedLiteralExpression:
+    return OS << "StringUserDefinedLiteralExpression";
+  case NodeKind::PrefixUnaryOperatorExpression:
+    return OS << "PrefixUnaryOperatorExpression";
+  case NodeKind::PostfixUnaryOperatorExpression:
+    return OS << "PostfixUnaryOperatorExpression";
+  case NodeKind::BinaryOperatorExpression:
+    return OS << "BinaryOperatorExpression";
+  case NodeKind::UnqualifiedId:
+    return OS << "UnqualifiedId";
+  case NodeKind::IdExpression:
+    return OS << "IdExpression";
   case NodeKind::UnknownStatement:
     return OS << "UnknownStatement";
   case NodeKind::DeclarationStatement:
@@ -58,6 +90,10 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
     return OS << "LinkageSpecificationDeclaration";
   case NodeKind::SimpleDeclaration:
     return OS << "SimpleDeclaration";
+  case NodeKind::TemplateDeclaration:
+    return OS << "TemplateDeclaration";
+  case NodeKind::ExplicitTemplateInstantiation:
+    return OS << "ExplicitTemplateInstantiation";
   case NodeKind::NamespaceDefinition:
     return OS << "NamespaceDefinition";
   case NodeKind::NamespaceAliasDefinition:
@@ -68,6 +104,22 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
     return OS << "UsingDeclaration";
   case NodeKind::TypeAliasDeclaration:
     return OS << "TypeAliasDeclaration";
+  case NodeKind::SimpleDeclarator:
+    return OS << "SimpleDeclarator";
+  case NodeKind::ParenDeclarator:
+    return OS << "ParenDeclarator";
+  case NodeKind::ArraySubscript:
+    return OS << "ArraySubscript";
+  case NodeKind::TrailingReturnType:
+    return OS << "TrailingReturnType";
+  case NodeKind::ParametersAndQualifiers:
+    return OS << "ParametersAndQualifiers";
+  case NodeKind::MemberPointer:
+    return OS << "MemberPointer";
+  case NodeKind::NameSpecifier:
+    return OS << "NameSpecifier";
+  case NodeKind::NestedNameSpecifier:
+    return OS << "NestedNameSpecifier";
   }
   llvm_unreachable("unknown node kind");
 }
@@ -84,6 +136,12 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
     return OS << "CloseParen";
   case syntax::NodeRole::IntroducerKeyword:
     return OS << "IntroducerKeyword";
+  case syntax::NodeRole::LiteralToken:
+    return OS << "LiteralToken";
+  case syntax::NodeRole::ArrowToken:
+    return OS << "ArrowToken";
+  case syntax::NodeRole::ExternKeyword:
+    return OS << "ExternKeyword";
   case syntax::NodeRole::BodyStatement:
     return OS << "BodyStatement";
   case syntax::NodeRole::CaseStatement_value:
@@ -94,6 +152,14 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
     return OS << "IfStatement_elseKeyword";
   case syntax::NodeRole::IfStatement_elseStatement:
     return OS << "IfStatement_elseStatement";
+  case syntax::NodeRole::OperatorExpression_operatorToken:
+    return OS << "OperatorExpression_operatorToken";
+  case syntax::NodeRole::UnaryOperatorExpression_operand:
+    return OS << "UnaryOperatorExpression_operand";
+  case syntax::NodeRole::BinaryOperatorExpression_leftHandSide:
+    return OS << "BinaryOperatorExpression_leftHandSide";
+  case syntax::NodeRole::BinaryOperatorExpression_rightHandSide:
+    return OS << "BinaryOperatorExpression_rightHandSide";
   case syntax::NodeRole::ReturnStatement_value:
     return OS << "ReturnStatement_value";
   case syntax::NodeRole::ExpressionStatement_expression:
@@ -104,10 +170,126 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
     return OS << "StaticAssertDeclaration_condition";
   case syntax::NodeRole::StaticAssertDeclaration_message:
     return OS << "StaticAssertDeclaration_message";
+  case syntax::NodeRole::SimpleDeclaration_declarator:
+    return OS << "SimpleDeclaration_declarator";
+  case syntax::NodeRole::TemplateDeclaration_declaration:
+    return OS << "TemplateDeclaration_declaration";
+  case syntax::NodeRole::ExplicitTemplateInstantiation_declaration:
+    return OS << "ExplicitTemplateInstantiation_declaration";
+  case syntax::NodeRole::ArraySubscript_sizeExpression:
+    return OS << "ArraySubscript_sizeExpression";
+  case syntax::NodeRole::TrailingReturnType_declarator:
+    return OS << "TrailingReturnType_declarator";
+  case syntax::NodeRole::ParametersAndQualifiers_parameter:
+    return OS << "ParametersAndQualifiers_parameter";
+  case syntax::NodeRole::ParametersAndQualifiers_trailingReturn:
+    return OS << "ParametersAndQualifiers_trailingReturn";
+  case syntax::NodeRole::IdExpression_id:
+    return OS << "IdExpression_id";
+  case syntax::NodeRole::IdExpression_qualifier:
+    return OS << "IdExpression_qualifier";
+  case syntax::NodeRole::NestedNameSpecifier_specifier:
+    return OS << "NestedNameSpecifier_specifier";
+  case syntax::NodeRole::ParenExpression_subExpression:
+    return OS << "ParenExpression_subExpression";
   }
   llvm_unreachable("invalid role");
 }
 
+std::vector<syntax::NameSpecifier *> syntax::NestedNameSpecifier::specifiers() {
+  std::vector<syntax::NameSpecifier *> Children;
+  for (auto *C = firstChild(); C; C = C->nextSibling()) {
+    assert(C->role() == syntax::NodeRole::NestedNameSpecifier_specifier);
+    Children.push_back(llvm::cast<syntax::NameSpecifier>(C));
+  }
+  return Children;
+}
+
+syntax::NestedNameSpecifier *syntax::IdExpression::qualifier() {
+  return llvm::cast_or_null<syntax::NestedNameSpecifier>(
+      findChild(syntax::NodeRole::IdExpression_qualifier));
+}
+
+syntax::UnqualifiedId *syntax::IdExpression::unqualifiedId() {
+  return llvm::cast_or_null<syntax::UnqualifiedId>(
+      findChild(syntax::NodeRole::IdExpression_id));
+}
+
+syntax::Leaf *syntax::ParenExpression::openParen() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::OpenParen));
+}
+
+syntax::Expression *syntax::ParenExpression::subExpression() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::ParenExpression_subExpression));
+}
+
+syntax::Leaf *syntax::ParenExpression::closeParen() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::CloseParen));
+}
+
+syntax::Leaf *syntax::IntegerLiteralExpression::literalToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::CharacterLiteralExpression::literalToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::FloatingLiteralExpression::literalToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::StringLiteralExpression::literalToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::BoolLiteralExpression::literalToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::CxxNullPtrExpression::nullPtrKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Expression *syntax::BinaryOperatorExpression::lhs() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide));
+}
+
+syntax::Leaf *syntax::UnaryOperatorExpression::operatorToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::OperatorExpression_operatorToken));
+}
+
+syntax::Expression *syntax::UnaryOperatorExpression::operand() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::UnaryOperatorExpression_operand));
+}
+
+syntax::Leaf *syntax::BinaryOperatorExpression::operatorToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::OperatorExpression_operatorToken));
+}
+
+syntax::Expression *syntax::BinaryOperatorExpression::rhs() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::BinaryOperatorExpression_rightHandSide));
+}
+
 syntax::Leaf *syntax::SwitchStatement::switchKeyword() {
   return llvm::cast_or_null<syntax::Leaf>(
       findChild(syntax::NodeRole::IntroducerKeyword));
@@ -226,8 +408,8 @@ syntax::Leaf *syntax::CompoundStatement::lbrace() {
 std::vector<syntax::Statement *> syntax::CompoundStatement::statements() {
   std::vector<syntax::Statement *> Children;
   for (auto *C = firstChild(); C; C = C->nextSibling()) {
-    if (C->role() == syntax::NodeRole::CompoundStatement_statement)
-      Children.push_back(llvm::cast<syntax::Statement>(C));
+    assert(C->role() == syntax::NodeRole::CompoundStatement_statement);
+    Children.push_back(llvm::cast<syntax::Statement>(C));
   }
   return Children;
 }
@@ -246,3 +428,98 @@ syntax::Expression *syntax::StaticAssertDeclaration::message() {
   return llvm::cast_or_null<syntax::Expression>(
       findChild(syntax::NodeRole::StaticAssertDeclaration_message));
 }
+
+std::vector<syntax::SimpleDeclarator *>
+syntax::SimpleDeclaration::declarators() {
+  std::vector<syntax::SimpleDeclarator *> Children;
+  for (auto *C = firstChild(); C; C = C->nextSibling()) {
+    if (C->role() == syntax::NodeRole::SimpleDeclaration_declarator)
+      Children.push_back(llvm::cast<syntax::SimpleDeclarator>(C));
+  }
+  return Children;
+}
+
+syntax::Leaf *syntax::TemplateDeclaration::templateKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Declaration *syntax::TemplateDeclaration::declaration() {
+  return llvm::cast_or_null<syntax::Declaration>(
+      findChild(syntax::NodeRole::TemplateDeclaration_declaration));
+}
+
+syntax::Leaf *syntax::ExplicitTemplateInstantiation::templateKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Leaf *syntax::ExplicitTemplateInstantiation::externKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::ExternKeyword));
+}
+
+syntax::Declaration *syntax::ExplicitTemplateInstantiation::declaration() {
+  return llvm::cast_or_null<syntax::Declaration>(
+      findChild(syntax::NodeRole::ExplicitTemplateInstantiation_declaration));
+}
+
+syntax::Leaf *syntax::ParenDeclarator::lparen() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::OpenParen));
+}
+
+syntax::Leaf *syntax::ParenDeclarator::rparen() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::CloseParen));
+}
+
+syntax::Leaf *syntax::ArraySubscript::lbracket() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::OpenParen));
+}
+
+syntax::Expression *syntax::ArraySubscript::sizeExpression() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::ArraySubscript_sizeExpression));
+}
+
+syntax::Leaf *syntax::ArraySubscript::rbracket() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::CloseParen));
+}
+
+syntax::Leaf *syntax::TrailingReturnType::arrowToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::ArrowToken));
+}
+
+syntax::SimpleDeclarator *syntax::TrailingReturnType::declarator() {
+  return llvm::cast_or_null<syntax::SimpleDeclarator>(
+      findChild(syntax::NodeRole::TrailingReturnType_declarator));
+}
+
+syntax::Leaf *syntax::ParametersAndQualifiers::lparen() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::OpenParen));
+}
+
+std::vector<syntax::SimpleDeclaration *>
+syntax::ParametersAndQualifiers::parameters() {
+  std::vector<syntax::SimpleDeclaration *> Children;
+  for (auto *C = firstChild(); C; C = C->nextSibling()) {
+    if (C->role() == syntax::NodeRole::ParametersAndQualifiers_parameter)
+      Children.push_back(llvm::cast<syntax::SimpleDeclaration>(C));
+  }
+  return Children;
+}
+
+syntax::Leaf *syntax::ParametersAndQualifiers::rparen() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::CloseParen));
+}
+
+syntax::TrailingReturnType *syntax::ParametersAndQualifiers::trailingReturn() {
+  return llvm::cast_or_null<syntax::TrailingReturnType>(
+      findChild(syntax::NodeRole::ParametersAndQualifiers_trailingReturn));
+}
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index 3df1c064923a1..c6b904822b8b5 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -35,6 +35,69 @@
 using namespace clang;
 using namespace clang::syntax;
 
+namespace {
+// Finds the smallest consecutive subsuquence of Toks that covers R.
+llvm::ArrayRef<syntax::Token>
+getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
+                  const SourceManager &SM) {
+  if (R.isInvalid())
+    return {};
+  const syntax::Token *Begin =
+      llvm::partition_point(Toks, [&](const syntax::Token &T) {
+        return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
+      });
+  const syntax::Token *End =
+      llvm::partition_point(Toks, [&](const syntax::Token &T) {
+        return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
+      });
+  if (Begin > End)
+    return {};
+  return {Begin, End};
+}
+
+// Finds the smallest expansion range that contains expanded tokens First and
+// Last, e.g.:
+// #define ID(x) x
+// ID(ID(ID(a1) a2))
+//          ~~       -> a1
+//              ~~   -> a2
+//       ~~~~~~~~~   -> a1 a2
+SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
+                                        const syntax::Token &Last,
+                                        const SourceManager &SM) {
+  SourceRange Res;
+  auto FirstLoc = First.location(), LastLoc = Last.location();
+  // Keep traversing up the spelling chain as longs as tokens are part of the
+  // same expansion.
+  while (!FirstLoc.isFileID() && !LastLoc.isFileID()) {
+    auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
+    auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
+    // Stop if expansions have diverged.
+    if (ExpInfoFirst.getExpansionLocStart() !=
+        ExpInfoLast.getExpansionLocStart())
+      break;
+    // Do not continue into macro bodies.
+    if (!ExpInfoFirst.isMacroArgExpansion() ||
+        !ExpInfoLast.isMacroArgExpansion())
+      break;
+    FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
+    LastLoc = SM.getImmediateSpellingLoc(LastLoc);
+    // Update the result afterwards, as we want the tokens that triggered the
+    // expansion.
+    Res = {FirstLoc, LastLoc};
+  }
+  // Normally mapping back to expansion location here only changes FileID, as
+  // we've already found some tokens expanded from the same macro argument, and
+  // they should map to a consecutive subset of spelled tokens. Unfortunately
+  // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
+  // based on their FileID in addition to offsets. So even though we are
+  // referring to same tokens, SourceManager might tell us that one is before
+  // the other if they've got different FileIDs.
+  return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
+}
+
+} // namespace
+
 syntax::Token::Token(SourceLocation Location, unsigned Length,
                      tok::TokenKind Kind)
     : Location(Location), Length(Length), Kind(Kind) {
@@ -67,7 +130,8 @@ FileRange syntax::Token::range(const SourceManager &SM,
   auto F = First.range(SM);
   auto L = Last.range(SM);
   assert(F.file() == L.file() && "tokens from different files");
-  assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens");
+  assert((F == L || F.endOffset() <= L.beginOffset()) &&
+         "wrong order of tokens");
   return FileRange(F.file(), F.beginOffset(), L.endOffset());
 }
 
@@ -120,19 +184,7 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const {
 }
 
 llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
-  if (R.isInvalid())
-    return {};
-  const Token *Begin =
-      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
-        return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
-      });
-  const Token *End =
-      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
-        return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
-      });
-  if (Begin > End)
-    return {};
-  return {Begin, End};
+  return getTokensCovering(expandedTokens(), R, *SourceMgr);
 }
 
 CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
@@ -161,19 +213,109 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
   // Our token could only be produced by the previous mapping.
   if (It == File.Mappings.begin()) {
     // No previous mapping, no need to modify offsets.
-    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr};
+    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
+            /*Mapping=*/nullptr};
   }
   --It; // 'It' now points to last mapping that started before our token.
 
   // Check if the token is part of the mapping.
   if (ExpandedIndex < It->EndExpanded)
-    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It};
+    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
 
   // Not part of the mapping, use the index from previous mapping to compute the
   // corresponding spelled token.
   return {
       &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
-      /*Mapping*/ nullptr};
+      /*Mapping=*/nullptr};
+}
+
+const TokenBuffer::Mapping *
+TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
+                                          const syntax::Token *Spelled) {
+  assert(F.SpelledTokens.data() <= Spelled);
+  unsigned SpelledI = Spelled - F.SpelledTokens.data();
+  assert(SpelledI < F.SpelledTokens.size());
+
+  auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
+    return M.BeginSpelled <= SpelledI;
+  });
+  if (It == F.Mappings.begin())
+    return nullptr;
+  --It;
+  return &*It;
+}
+
+llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
+  if (Spelled.empty())
+    return {};
+  assert(Spelled.front().location().isFileID());
+
+  auto FID = sourceManager().getFileID(Spelled.front().location());
+  auto It = Files.find(FID);
+  assert(It != Files.end());
+
+  const MarkedFile &File = It->second;
+  // `Spelled` must be a subrange of `File.SpelledTokens`.
+  assert(File.SpelledTokens.data() <= Spelled.data());
+  assert(&Spelled.back() <=
+         File.SpelledTokens.data() + File.SpelledTokens.size());
+#ifndef NDEBUG
+  auto T1 = Spelled.back().location();
+  auto T2 = File.SpelledTokens.back().location();
+  assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
+#endif
+
+  auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
+  unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
+  assert(SpelledFrontI < File.SpelledTokens.size());
+  unsigned ExpandedBegin;
+  if (!FrontMapping) {
+    // No mapping that starts before the first token of Spelled, we don't have
+    // to modify offsets.
+    ExpandedBegin = File.BeginExpanded + SpelledFrontI;
+  } else if (SpelledFrontI < FrontMapping->EndSpelled) {
+    // This mapping applies to Spelled tokens.
+    if (SpelledFrontI != FrontMapping->BeginSpelled) {
+      // Spelled tokens don't cover the entire mapping, returning empty result.
+      return {}; // FIXME: support macro arguments.
+    }
+    // Spelled tokens start at the beginning of this mapping.
+    ExpandedBegin = FrontMapping->BeginExpanded;
+  } else {
+    // Spelled tokens start after the mapping ends (they start in the hole
+    // between 2 mappings, or between a mapping and end of the file).
+    ExpandedBegin =
+        FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
+  }
+
+  auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
+  unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
+  unsigned ExpandedEnd;
+  if (!BackMapping) {
+    // No mapping that starts before the last token of Spelled, we don't have to
+    // modify offsets.
+    ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
+  } else if (SpelledBackI < BackMapping->EndSpelled) {
+    // This mapping applies to Spelled tokens.
+    if (SpelledBackI + 1 != BackMapping->EndSpelled) {
+      // Spelled tokens don't cover the entire mapping, returning empty result.
+      return {}; // FIXME: support macro arguments.
+    }
+    ExpandedEnd = BackMapping->EndExpanded;
+  } else {
+    // Spelled tokens end after the mapping ends.
+    ExpandedEnd =
+        BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
+  }
+
+  assert(ExpandedBegin < ExpandedTokens.size());
+  assert(ExpandedEnd < ExpandedTokens.size());
+  // Avoid returning empty ranges.
+  if (ExpandedBegin == ExpandedEnd)
+    return {};
+  return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
+                             ExpandedTokens.data() + ExpandedEnd)};
 }
 
 llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
@@ -182,9 +324,20 @@ llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
   return It->second.SpelledTokens;
 }
 
+const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
+  assert(Loc.isFileID());
+  const auto *Tok = llvm::partition_point(
+      spelledTokens(SourceMgr->getFileID(Loc)),
+      [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
+  if (!Tok || Tok->location() != Loc)
+    return nullptr;
+  return Tok;
+}
+
 std::string TokenBuffer::Mapping::str() const {
-  return llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
-                       BeginSpelled, EndSpelled, BeginExpanded, EndExpanded);
+  return std::string(
+      llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
+                    BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
 }
 
 llvm::Optional<llvm::ArrayRef<syntax::Token>>
@@ -194,8 +347,6 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
   if (Expanded.empty())
     return llvm::None;
 
-  // FIXME: also allow changes uniquely mapping to macro arguments.
-
   const syntax::Token *BeginSpelled;
   const Mapping *BeginMapping;
   std::tie(BeginSpelled, BeginMapping) =
@@ -213,12 +364,28 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
 
   const MarkedFile &File = Files.find(FID)->second;
 
-  // Do not allow changes that cross macro expansion boundaries.
+  // If both tokens are coming from a macro argument expansion, try and map to
+  // smallest part of the macro argument. BeginMapping && LastMapping check is
+  // only for performance, they are a prerequisite for Expanded.front() and
+  // Expanded.back() being part of a macro arg expansion.
+  if (BeginMapping && LastMapping &&
+      SourceMgr->isMacroArgExpansion(Expanded.front().location()) &&
+      SourceMgr->isMacroArgExpansion(Expanded.back().location())) {
+    auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
+                                                   Expanded.back(), *SourceMgr);
+    // It might be the case that tokens are arguments of different macro calls,
+    // in that case we should continue with the logic below instead of returning
+    // an empty range.
+    if (CommonRange.isValid())
+      return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
+  }
+
+  // Do not allow changes that doesn't cover full expansion.
   unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
   unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
-  if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded)
+  if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded)
     return llvm::None;
-  if (LastMapping && EndExpanded < LastMapping->EndExpanded)
+  if (LastMapping && LastMapping->EndExpanded != EndExpanded)
     return llvm::None;
   // All is good, return the result.
   return llvm::makeArrayRef(
@@ -253,24 +420,30 @@ TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
                                   ExpandedTokens.data() + M->EndExpanded);
   return E;
 }
-
 llvm::ArrayRef<syntax::Token>
 syntax::spelledTokensTouching(SourceLocation Loc,
-                              const syntax::TokenBuffer &Tokens) {
+                              llvm::ArrayRef<syntax::Token> Tokens) {
   assert(Loc.isFileID());
-  llvm::ArrayRef<syntax::Token> All =
-      Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc));
+
   auto *Right = llvm::partition_point(
-      All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
-  bool AcceptRight = Right != All.end() && Right->location() <= Loc;
-  bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc;
+      Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
+  bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
+  bool AcceptLeft =
+      Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
   return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
                             Right + (AcceptRight ? 1 : 0));
 }
 
+llvm::ArrayRef<syntax::Token>
+syntax::spelledTokensTouching(SourceLocation Loc,
+                              const syntax::TokenBuffer &Tokens) {
+  return spelledTokensTouching(
+      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
+}
+
 const syntax::Token *
 syntax::spelledIdentifierTouching(SourceLocation Loc,
-                                  const syntax::TokenBuffer &Tokens) {
+                                  llvm::ArrayRef<syntax::Token> Tokens) {
   for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
     if (Tok.kind() == tok::identifier)
       return &Tok;
@@ -278,6 +451,13 @@ syntax::spelledIdentifierTouching(SourceLocation Loc,
   return nullptr;
 }
 
+const syntax::Token *
+syntax::spelledIdentifierTouching(SourceLocation Loc,
+                                  const syntax::TokenBuffer &Tokens) {
+  return spelledIdentifierTouching(
+      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
+}
+
 std::vector<const syntax::Token *>
 TokenBuffer::macroExpansions(FileID FID) const {
   auto FileIt = Files.find(FID);
@@ -293,7 +473,8 @@ TokenBuffer::macroExpansions(FileID FID) const {
   return Expansions;
 }
 
-std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
+std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
+                                            const SourceManager &SM,
                                             const LangOptions &LO) {
   std::vector<syntax::Token> Tokens;
   IdentifierTable Identifiers(LO);
@@ -308,18 +489,28 @@ std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
     Tokens.push_back(syntax::Token(T));
   };
 
-  Lexer L(FID, SM.getBuffer(FID), SM, LO);
+  auto SrcBuffer = SM.getBufferData(FR.file());
+  Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
+          SrcBuffer.data() + FR.beginOffset(),
+          // We can't make BufEnd point to FR.endOffset, as Lexer requires a
+          // null terminated buffer.
+          SrcBuffer.data() + SrcBuffer.size());
 
   clang::Token T;
-  while (!L.LexFromRawLexer(T))
+  while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
     AddToken(T);
-  // 'eof' is only the last token if the input is null-terminated. Never store
-  // it, for consistency.
-  if (T.getKind() != tok::eof)
+  // LexFromRawLexer returns true when it parses the last token of the file, add
+  // it iff it starts within the range we are interested in.
+  if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
     AddToken(T);
   return Tokens;
 }
 
+std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
+                                            const LangOptions &LO) {
+  return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
+}
+
 /// Records information reqired to construct mappings for the token buffer that
 /// we are collecting.
 class TokenCollector::CollectPPExpansions : public PPCallbacks {
@@ -335,14 +526,38 @@ public:
                     SourceRange Range, const MacroArgs *Args) override {
     if (!Collector)
       return;
-    // Only record top-level expansions, not those where:
+    const auto &SM = Collector->PP.getSourceManager();
+    // Only record top-level expansions that directly produce expanded tokens.
+    // This excludes those where:
     //   - the macro use is inside a macro body,
     //   - the macro appears in an argument to another macro.
-    if (!MacroNameTok.getLocation().isFileID() ||
-        (LastExpansionEnd.isValid() &&
-         Collector->PP.getSourceManager().isBeforeInTranslationUnit(
-             Range.getBegin(), LastExpansionEnd)))
+    // However macro expansion isn't really a tree, it's token rewrite rules,
+    // so there are other cases, e.g.
+    //   #define B(X) X
+    //   #define A 1 + B
+    //   A(2)
+    // Both A and B produce expanded tokens, though the macro name 'B' comes
+    // from an expansion. The best we can do is merge the mappings for both.
+
+    // The *last* token of any top-level macro expansion must be in a file.
+    // (In the example above, see the closing paren of the expansion of B).
+    if (!Range.getEnd().isFileID())
+      return;
+    // If there's a current expansion that encloses this one, this one can't be
+    // top-level.
+    if (LastExpansionEnd.isValid() &&
+        !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
       return;
+
+    // If the macro invocation (B) starts in a macro (A) but ends in a file,
+    // we'll create a merged mapping for A + B by overwriting the endpoint for
+    // A's startpoint.
+    if (!Range.getBegin().isFileID()) {
+      Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
+      assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) &&
+             "Overlapping macros should have same expansion location");
+    }
+
     Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
     LastExpansionEnd = Range.getEnd();
   }
@@ -399,197 +614,180 @@ public:
   }
 
   TokenBuffer build() && {
-    buildSpelledTokens();
-
-    // Walk over expanded tokens and spelled tokens in parallel, building the
-    // mappings between those using source locations.
-    // To correctly recover empty macro expansions, we also take locations
-    // reported to PPCallbacks::MacroExpands into account as we do not have any
-    // expanded tokens with source locations to guide us.
-
-    // The 'eof' token is special, it is not part of spelled token stream. We
-    // handle it separately at the end.
     assert(!Result.ExpandedTokens.empty());
     assert(Result.ExpandedTokens.back().kind() == tok::eof);
-    for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) {
-      // (!) I might be updated by the following call.
-      processExpandedToken(I);
-    }
 
-    // 'eof' not handled in the loop, do it here.
-    assert(SM.getMainFileID() ==
-           SM.getFileID(Result.ExpandedTokens.back().location()));
-    fillGapUntil(Result.Files[SM.getMainFileID()],
-                 Result.ExpandedTokens.back().location(),
-                 Result.ExpandedTokens.size() - 1);
-    Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size();
+    // Tokenize every file that contributed tokens to the expanded stream.
+    buildSpelledTokens();
 
-    // Some files might have unaccounted spelled tokens at the end, add an empty
-    // mapping for those as they did not have expanded counterparts.
-    fillGapsAtEndOfFiles();
+    // The expanded token stream consists of runs of tokens that came from
+    // the same source (a macro expansion, part of a file etc).
+    // Between these runs are the logical positions of spelled tokens that
+    // didn't expand to anything.
+    while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
+      // Create empty mappings for spelled tokens that expanded to nothing here.
+      // May advance NextSpelled, but NextExpanded is unchanged.
+      discard();
+      // Create mapping for a contiguous run of expanded tokens.
+      // Advances NextExpanded past the run, and NextSpelled accordingly.
+      unsigned OldPosition = NextExpanded;
+      advance();
+      if (NextExpanded == OldPosition)
+        diagnoseAdvanceFailure();
+    }
+    // If any tokens remain in any of the files, they didn't expand to anything.
+    // Create empty mappings up until the end of the file.
+    for (const auto &File : Result.Files)
+      discard(File.first);
+
+#ifndef NDEBUG
+    for (auto &pair : Result.Files) {
+      auto &mappings = pair.second.Mappings;
+      assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
+                                          const TokenBuffer::Mapping &M2) {
+        return M1.BeginSpelled < M2.BeginSpelled &&
+               M1.EndSpelled < M2.EndSpelled &&
+               M1.BeginExpanded < M2.BeginExpanded &&
+               M1.EndExpanded < M2.EndExpanded;
+      }));
+    }
+#endif
 
     return std::move(Result);
   }
 
 private:
-  /// Process the next token in an expanded stream and move corresponding
-  /// spelled tokens, record any mapping if needed.
-  /// (!) \p I will be updated if this had to skip tokens, e.g. for macros.
-  void processExpandedToken(unsigned &I) {
-    auto L = Result.ExpandedTokens[I].location();
-    if (L.isMacroID()) {
-      processMacroExpansion(SM.getExpansionRange(L), I);
-      return;
+  // Consume a sequence of spelled tokens that didn't expand to anything.
+  // In the simplest case, skips spelled tokens until finding one that produced
+  // the NextExpanded token, and creates an empty mapping for them.
+  // If Drain is provided, skips remaining tokens from that file instead.
+  void discard(llvm::Optional<FileID> Drain = llvm::None) {
+    SourceLocation Target =
+        Drain ? SM.getLocForEndOfFile(*Drain)
+              : SM.getExpansionLoc(
+                    Result.ExpandedTokens[NextExpanded].location());
+    FileID File = SM.getFileID(Target);
+    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
+    auto &NextSpelled = this->NextSpelled[File];
+
+    TokenBuffer::Mapping Mapping;
+    Mapping.BeginSpelled = NextSpelled;
+    // When dropping trailing tokens from a file, the empty mapping should
+    // be positioned within the file's expanded-token range (at the end).
+    Mapping.BeginExpanded = Mapping.EndExpanded =
+        Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
+    // We may want to split into several adjacent empty mappings.
+    // FlushMapping() emits the current mapping and starts a new one.
+    auto FlushMapping = [&, this] {
+      Mapping.EndSpelled = NextSpelled;
+      if (Mapping.BeginSpelled != Mapping.EndSpelled)
+        Result.Files[File].Mappings.push_back(Mapping);
+      Mapping.BeginSpelled = NextSpelled;
+    };
+
+    while (NextSpelled < SpelledTokens.size() &&
+           SpelledTokens[NextSpelled].location() < Target) {
+      // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
+      // then we want to partition our (empty) mapping.
+      //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
+      SourceLocation KnownEnd = CollectedExpansions.lookup(
+          SpelledTokens[NextSpelled].location().getRawEncoding());
+      if (KnownEnd.isValid()) {
+        FlushMapping(); // Emits [Start, NextSpelled)
+        while (NextSpelled < SpelledTokens.size() &&
+               SpelledTokens[NextSpelled].location() <= KnownEnd)
+          ++NextSpelled;
+        FlushMapping(); // Emits [NextSpelled, KnownEnd]
+        // Now the loop contitues and will emit (KnownEnd, Target).
+      } else {
+        ++NextSpelled;
+      }
     }
-    if (L.isFileID()) {
-      auto FID = SM.getFileID(L);
-      TokenBuffer::MarkedFile &File = Result.Files[FID];
-
-      fillGapUntil(File, L, I);
+    FlushMapping();
+  }
 
-      // Skip the token.
-      assert(File.SpelledTokens[NextSpelled[FID]].location() == L &&
-             "no corresponding token in the spelled stream");
-      ++NextSpelled[FID];
-      return;
+  // Consumes the NextExpanded token and others that are part of the same run.
+  // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
+  // (unless this is a run of file tokens, which we represent with no mapping).
+  void advance() {
+    const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
+    SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
+    FileID File = SM.getFileID(Expansion);
+    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
+    auto &NextSpelled = this->NextSpelled[File];
+
+    if (Tok.location().isFileID()) {
+      // A run of file tokens continues while the expanded/spelled tokens match.
+      while (NextSpelled < SpelledTokens.size() &&
+             NextExpanded < Result.ExpandedTokens.size() &&
+             SpelledTokens[NextSpelled].location() ==
+                 Result.ExpandedTokens[NextExpanded].location()) {
+        ++NextSpelled;
+        ++NextExpanded;
+      }
+      // We need no mapping for file tokens copied to the expanded stream.
+    } else {
+      // We found a new macro expansion. We should have its spelling bounds.
+      auto End = CollectedExpansions.lookup(Expansion.getRawEncoding());
+      assert(End.isValid() && "Macro expansion wasn't captured?");
+
+      // Mapping starts here...
+      TokenBuffer::Mapping Mapping;
+      Mapping.BeginExpanded = NextExpanded;
+      Mapping.BeginSpelled = NextSpelled;
+      // ... consumes spelled tokens within bounds we captured ...
+      while (NextSpelled < SpelledTokens.size() &&
+             SpelledTokens[NextSpelled].location() <= End)
+        ++NextSpelled;
+      // ... consumes expanded tokens rooted at the same expansion ...
+      while (NextExpanded < Result.ExpandedTokens.size() &&
+             SM.getExpansionLoc(
+                 Result.ExpandedTokens[NextExpanded].location()) == Expansion)
+        ++NextExpanded;
+      // ... and ends here.
+      Mapping.EndExpanded = NextExpanded;
+      Mapping.EndSpelled = NextSpelled;
+      Result.Files[File].Mappings.push_back(Mapping);
     }
   }
 
-  /// Skipped expanded and spelled tokens of a macro expansion that covers \p
-  /// SpelledRange. Add a corresponding mapping.
-  /// (!) \p I will be the index of the last token in an expansion after this
-  /// function returns.
-  void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) {
-    auto FID = SM.getFileID(SpelledRange.getBegin());
-    assert(FID == SM.getFileID(SpelledRange.getEnd()));
-    TokenBuffer::MarkedFile &File = Result.Files[FID];
-
-    fillGapUntil(File, SpelledRange.getBegin(), I);
-
-    // Skip all expanded tokens from the same macro expansion.
-    unsigned BeginExpanded = I;
-    for (; I + 1 < Result.ExpandedTokens.size(); ++I) {
-      auto NextL = Result.ExpandedTokens[I + 1].location();
-      if (!NextL.isMacroID() ||
-          SM.getExpansionLoc(NextL) != SpelledRange.getBegin())
-        break;
+  // advance() is supposed to consume at least one token - if not, we crash.
+  void diagnoseAdvanceFailure() {
+#ifndef NDEBUG
+    // Show the failed-to-map token in context.
+    for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
+         I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
+      const char *L =
+          (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
+      llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
     }
-    unsigned EndExpanded = I + 1;
-    consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded,
-                   EndExpanded, NextSpelled[FID]);
+#endif
+    llvm_unreachable("Couldn't map expanded token to spelled tokens!");
   }
 
   /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
   /// ranges for each of the files.
   void buildSpelledTokens() {
     for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
-      auto FID =
-          SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location()));
+      const auto &Tok = Result.ExpandedTokens[I];
+      auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
       auto It = Result.Files.try_emplace(FID);
       TokenBuffer::MarkedFile &File = It.first->second;
 
-      File.EndExpanded = I + 1;
+      // The eof token should not be considered part of the main-file's range.
+      File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
+
       if (!It.second)
         continue; // we have seen this file before.
-
       // This is the first time we see this file.
       File.BeginExpanded = I;
       File.SpelledTokens = tokenize(FID, SM, LangOpts);
     }
   }
 
-  void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
-                           unsigned ExpandedIndex, unsigned &SpelledIndex) {
-    consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
-  }
-
-  /// Consumes spelled tokens that form a macro expansion and adds a entry to
-  /// the resulting token buffer.
-  /// (!) SpelledIndex is updated in-place.
-  void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
-                      unsigned BeginExpanded, unsigned EndExpanded,
-                      unsigned &SpelledIndex) {
-    // We need to record this mapping before continuing.
-    unsigned MappingBegin = SpelledIndex;
-    ++SpelledIndex;
-
-    bool HitMapping =
-        tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
-    (void)HitMapping;
-    assert(!HitMapping && "recursive macro expansion?");
-
-    TokenBuffer::Mapping M;
-    M.BeginExpanded = BeginExpanded;
-    M.EndExpanded = EndExpanded;
-    M.BeginSpelled = MappingBegin;
-    M.EndSpelled = SpelledIndex;
-
-    File.Mappings.push_back(M);
-  }
-
-  /// Consumes spelled tokens until location \p L is reached and adds a mapping
-  /// covering the consumed tokens. The mapping will point to an empty expanded
-  /// range at position \p ExpandedIndex.
-  void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L,
-                    unsigned ExpandedIndex) {
-    assert(L.isFileID());
-    FileID FID;
-    unsigned Offset;
-    std::tie(FID, Offset) = SM.getDecomposedLoc(L);
-
-    unsigned &SpelledIndex = NextSpelled[FID];
-    unsigned MappingBegin = SpelledIndex;
-    while (true) {
-      auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
-      if (SpelledIndex != MappingBegin) {
-        TokenBuffer::Mapping M;
-        M.BeginSpelled = MappingBegin;
-        M.EndSpelled = SpelledIndex;
-        M.BeginExpanded = M.EndExpanded = ExpandedIndex;
-        File.Mappings.push_back(M);
-      }
-      if (!EndLoc)
-        break;
-      consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex,
-                          SpelledIndex);
-
-      MappingBegin = SpelledIndex;
-    }
-  };
-
-  /// Consumes spelled tokens until it reaches Offset or a mapping boundary,
-  /// i.e. a name of a macro expansion or the start '#' token of a PP directive.
-  /// (!) NextSpelled is updated in place.
-  ///
-  /// returns None if \p Offset was reached, otherwise returns the end location
-  /// of a mapping that starts at \p NextSpelled.
-  llvm::Optional<SourceLocation>
-  tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset,
-                         unsigned &NextSpelled) {
-    for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) {
-      auto L = File.SpelledTokens[NextSpelled].location();
-      if (Offset <= SM.getFileOffset(L))
-        return llvm::None; // reached the offset we are looking for.
-      auto Mapping = CollectedExpansions.find(L.getRawEncoding());
-      if (Mapping != CollectedExpansions.end())
-        return Mapping->second; // found a mapping before the offset.
-    }
-    return llvm::None; // no more tokens, we "reached" the offset.
-  }
-
-  /// Adds empty mappings for unconsumed spelled tokens at the end of each file.
-  void fillGapsAtEndOfFiles() {
-    for (auto &F : Result.Files) {
-      if (F.second.SpelledTokens.empty())
-        continue;
-      fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
-                   F.second.EndExpanded);
-    }
-  }
-
   TokenBuffer Result;
-  /// For each file, a position of the next spelled token we will consume.
-  llvm::DenseMap<FileID, unsigned> NextSpelled;
+  unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
+  llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
   PPExpansions CollectedExpansions;
   const SourceManager &SM;
   const LangOptions &LangOpts;
@@ -604,19 +802,20 @@ TokenBuffer TokenCollector::consume() && {
 }
 
 std::string syntax::Token::str() const {
-  return llvm::formatv("Token({0}, length = {1})", tok::getTokenName(kind()),
-                       length());
+  return std::string(llvm::formatv("Token({0}, length = {1})",
+                                   tok::getTokenName(kind()), length()));
 }
 
 std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
-  return llvm::formatv("{0}   {1}", tok::getTokenName(kind()), text(SM));
+  return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
+                                   tok::getTokenName(kind()), length()));
 }
 
 std::string TokenBuffer::dumpForTests() const {
   auto PrintToken = [this](const syntax::Token &T) -> std::string {
     if (T.kind() == tok::eof)
       return "<eof>";
-    return T.text(*SourceMgr);
+    return std::string(T.text(*SourceMgr));
   };
 
   auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp
index 9a6270ec4cce3..37579e6145b65 100644
--- a/clang/lib/Tooling/Syntax/Tree.cpp
+++ b/clang/lib/Tooling/Syntax/Tree.cpp
@@ -58,22 +58,33 @@ bool syntax::Leaf::classof(const Node *N) {
 
 syntax::Node::Node(NodeKind Kind)
     : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)),
-      Role(static_cast<unsigned>(NodeRole::Detached)), Original(false),
-      CanModify(false) {}
+      Role(0), Original(false), CanModify(false) {
+  this->setRole(NodeRole::Detached);
+}
 
 bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; }
 
+void syntax::Node::setRole(NodeRole NR) {
+  this->Role = static_cast<unsigned>(NR);
+}
+
 bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; }
 
 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
-  assert(Child->Parent == nullptr);
-  assert(Child->NextSibling == nullptr);
   assert(Child->role() == NodeRole::Detached);
   assert(Role != NodeRole::Detached);
 
+  Child->setRole(Role);
+  prependChildLowLevel(Child);
+}
+
+void syntax::Tree::prependChildLowLevel(Node *Child) {
+  assert(Child->Parent == nullptr);
+  assert(Child->NextSibling == nullptr);
+  assert(Child->role() != NodeRole::Detached);
+
   Child->Parent = this;
   Child->NextSibling = this->FirstChild;
-  Child->Role = static_cast<unsigned>(Role);
   this->FirstChild = Child;
 }
 
@@ -94,7 +105,7 @@ void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End,
        N != End;) {
     auto *Next = N->NextSibling;
 
-    N->Role = static_cast<unsigned>(NodeRole::Detached);
+    N->setRole(NodeRole::Detached);
     N->Parent = nullptr;
     N->NextSibling = nullptr;
     if (N->Original)