diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp')
| -rw-r--r-- | contrib/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp | 1752 | 
1 files changed, 1752 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp b/contrib/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp new file mode 100644 index 000000000000..fcac2250dd96 --- /dev/null +++ b/contrib/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -0,0 +1,1752 @@ +//===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/AST/ASTFwd.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclarationName.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/IgnoreExpr.h" +#include "clang/AST/OperationKinds.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/TypeLoc.h" +#include "clang/AST/TypeLocVisitor.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Specifiers.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <cstddef> +#include <map> + +using namespace clang; + +// Ignores the implicit `CXXConstructExpr` for copy/move constructor calls +// generated by the compiler, as well as in implicit conversions like the one +// wrapping `1` in `X x = 1;`. +static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) { +  if (auto *C = dyn_cast<CXXConstructExpr>(E)) { +    auto NumArgs = C->getNumArgs(); +    if (NumArgs == 1 || (NumArgs > 1 && isa<CXXDefaultArgExpr>(C->getArg(1)))) { +      Expr *A = C->getArg(0); +      if (C->getParenOrBraceRange().isInvalid()) +        return A; +    } +  } +  return E; +} + +// In: +// struct X { +//   X(int) +// }; +// X x = X(1); +// Ignores the implicit `CXXFunctionalCastExpr` that wraps +// `CXXConstructExpr X(1)`. +static Expr *IgnoreCXXFunctionalCastExprWrappingConstructor(Expr *E) { +  if (auto *F = dyn_cast<CXXFunctionalCastExpr>(E)) { +    if (F->getCastKind() == CK_ConstructorConversion) +      return F->getSubExpr(); +  } +  return E; +} + +static Expr *IgnoreImplicit(Expr *E) { +  return IgnoreExprNodes(E, IgnoreImplicitSingleStep, +                         IgnoreImplicitConstructorSingleStep, +                         IgnoreCXXFunctionalCastExprWrappingConstructor); +} + +LLVM_ATTRIBUTE_UNUSED +static bool isImplicitExpr(Expr *E) { return IgnoreImplicit(E) != E; } + +namespace { +/// Get start location of the Declarator from the TypeLoc. +/// E.g.: +///   loc of `(` in `int (a)` +///   loc of `*` in `int *(a)` +///   loc of the first `(` in `int (*a)(int)` +///   loc of the `*` in `int *(a)(int)` +///   loc of the first `*` in `const int *const *volatile a;` +/// +/// It is non-trivial to get the start location because TypeLocs are stored +/// inside out. In the example above `*volatile` is the TypeLoc returned +/// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` +/// returns. +struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { +  SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { +    auto L = Visit(T.getInnerLoc()); +    if (L.isValid()) +      return L; +    return T.getLParenLoc(); +  } + +  // Types spelled in the prefix part of the declarator. +  SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { +    return HandlePointer(T); +  } + +  SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { +    return HandlePointer(T); +  } + +  SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { +    return HandlePointer(T); +  } + +  SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { +    return HandlePointer(T); +  } + +  SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { +    return HandlePointer(T); +  } + +  // All other cases are not important, as they are either part of declaration +  // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on +  // existing declarators (e.g. QualifiedTypeLoc). They cannot start the +  // declarator themselves, but their underlying type can. +  SourceLocation VisitTypeLoc(TypeLoc T) { +    auto N = T.getNextTypeLoc(); +    if (!N) +      return SourceLocation(); +    return Visit(N); +  } + +  SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { +    if (T.getTypePtr()->hasTrailingReturn()) +      return SourceLocation(); // avoid recursing into the suffix of declarator. +    return VisitTypeLoc(T); +  } + +private: +  template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { +    auto L = Visit(T.getPointeeLoc()); +    if (L.isValid()) +      return L; +    return T.getLocalSourceRange().getBegin(); +  } +}; +} // namespace + +static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args) { +  auto FirstDefaultArg = +      llvm::find_if(Args, [](auto It) { return isa<CXXDefaultArgExpr>(It); }); +  return llvm::make_range(Args.begin(), FirstDefaultArg); +} + +static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { +  switch (E.getOperator()) { +  // Comparison +  case OO_EqualEqual: +  case OO_ExclaimEqual: +  case OO_Greater: +  case OO_GreaterEqual: +  case OO_Less: +  case OO_LessEqual: +  case OO_Spaceship: +  // Assignment +  case OO_Equal: +  case OO_SlashEqual: +  case OO_PercentEqual: +  case OO_CaretEqual: +  case OO_PipeEqual: +  case OO_LessLessEqual: +  case OO_GreaterGreaterEqual: +  case OO_PlusEqual: +  case OO_MinusEqual: +  case OO_StarEqual: +  case OO_AmpEqual: +  // Binary computation +  case OO_Slash: +  case OO_Percent: +  case OO_Caret: +  case OO_Pipe: +  case OO_LessLess: +  case OO_GreaterGreater: +  case OO_AmpAmp: +  case OO_PipePipe: +  case OO_ArrowStar: +  case OO_Comma: +    return syntax::NodeKind::BinaryOperatorExpression; +  case OO_Tilde: +  case OO_Exclaim: +    return syntax::NodeKind::PrefixUnaryOperatorExpression; +  // Prefix/Postfix increment/decrement +  case OO_PlusPlus: +  case OO_MinusMinus: +    switch (E.getNumArgs()) { +    case 1: +      return syntax::NodeKind::PrefixUnaryOperatorExpression; +    case 2: +      return syntax::NodeKind::PostfixUnaryOperatorExpression; +    default: +      llvm_unreachable("Invalid number of arguments for operator"); +    } +  // Operators that can be unary or binary +  case OO_Plus: +  case OO_Minus: +  case OO_Star: +  case OO_Amp: +    switch (E.getNumArgs()) { +    case 1: +      return syntax::NodeKind::PrefixUnaryOperatorExpression; +    case 2: +      return syntax::NodeKind::BinaryOperatorExpression; +    default: +      llvm_unreachable("Invalid number of arguments for operator"); +    } +    return syntax::NodeKind::BinaryOperatorExpression; +  // Not yet supported by SyntaxTree +  case OO_New: +  case OO_Delete: +  case OO_Array_New: +  case OO_Array_Delete: +  case OO_Coawait: +  case OO_Subscript: +  case OO_Arrow: +    return syntax::NodeKind::UnknownExpression; +  case OO_Call: +    return syntax::NodeKind::CallExpression; +  case OO_Conditional: // not overloadable +  case NUM_OVERLOADED_OPERATORS: +  case OO_None: +    llvm_unreachable("Not an overloadable operator"); +  } +  llvm_unreachable("Unknown OverloadedOperatorKind enum"); +} + +/// Get the start of the qualified name. In the examples below it gives the +/// location of the `^`: +///     `int ^a;` +///     `int *^a;` +///     `int ^a::S::f(){}` +static SourceLocation getQualifiedNameStart(NamedDecl *D) { +  assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && +         "only DeclaratorDecl and TypedefNameDecl are supported."); + +  auto DN = D->getDeclName(); +  bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); +  if (IsAnonymous) +    return SourceLocation(); + +  if (const auto *DD = dyn_cast<DeclaratorDecl>(D)) { +    if (DD->getQualifierLoc()) { +      return DD->getQualifierLoc().getBeginLoc(); +    } +  } + +  return D->getLocation(); +} + +/// Gets the range of the initializer inside an init-declarator C++ [dcl.decl]. +///     `int a;` -> range of ``, +///     `int *a = nullptr` -> range of `= nullptr`. +///     `int a{}` -> range of `{}`. +///     `int a()` -> range of `()`. +static SourceRange getInitializerRange(Decl *D) { +  if (auto *V = dyn_cast<VarDecl>(D)) { +    auto *I = V->getInit(); +    // Initializers in range-based-for are not part of the declarator +    if (I && !V->isCXXForRangeDecl()) +      return I->getSourceRange(); +  } + +  return SourceRange(); +} + +/// Gets the range of declarator as defined by the C++ grammar. E.g. +///     `int a;` -> range of `a`, +///     `int *a;` -> range of `*a`, +///     `int a[10];` -> range of `a[10]`, +///     `int a[1][2][3];` -> range of `a[1][2][3]`, +///     `int *a = nullptr` -> range of `*a = nullptr`. +///     `int S::f(){}` -> range of `S::f()`. +/// FIXME: \p Name must be a source range. +static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, +                                      SourceLocation Name, +                                      SourceRange Initializer) { +  SourceLocation Start = GetStartLoc().Visit(T); +  SourceLocation End = T.getEndLoc(); +  if (Name.isValid()) { +    if (Start.isInvalid()) +      Start = Name; +    // End of TypeLoc could be invalid if the type is invalid, fallback to the +    // NameLoc. +    if (End.isInvalid() || SM.isBeforeInTranslationUnit(End, Name)) +      End = Name; +  } +  if (Initializer.isValid()) { +    auto InitializerEnd = Initializer.getEnd(); +    assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || +           End == InitializerEnd); +    End = InitializerEnd; +  } +  return SourceRange(Start, End); +} + +namespace { +/// All AST hierarchy roots that can be represented as pointers. +using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; +/// Maintains a mapping from AST to syntax tree nodes. This class will get more +/// complicated as we support more kinds of AST nodes, e.g. TypeLocs. +/// FIXME: expose this as public API. +class ASTToSyntaxMapping { +public: +  void add(ASTPtr From, syntax::Tree *To) { +    assert(To != nullptr); +    assert(!From.isNull()); + +    bool Added = Nodes.insert({From, To}).second; +    (void)Added; +    assert(Added && "mapping added twice"); +  } + +  void add(NestedNameSpecifierLoc From, syntax::Tree *To) { +    assert(To != nullptr); +    assert(From.hasQualifier()); + +    bool Added = NNSNodes.insert({From, To}).second; +    (void)Added; +    assert(Added && "mapping added twice"); +  } + +  syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } + +  syntax::Tree *find(NestedNameSpecifierLoc P) const { +    return NNSNodes.lookup(P); +  } + +private: +  llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; +  llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes; +}; +} // namespace + +/// A helper class for constructing the syntax tree while traversing a clang +/// AST. +/// +/// At each point of the traversal we maintain a list of pending nodes. +/// Initially all tokens are added as pending nodes. When processing a clang AST +/// node, the clients need to: +///   - create a corresponding syntax node, +///   - assign roles to all pending child nodes with 'markChild' and +///     'markChildToken', +///   - replace the child nodes with the new syntax node in the pending list +///     with 'foldNode'. +/// +/// Note that all children are expected to be processed when building a node. +/// +/// Call finalize() to finish building the tree and consume the root node. +class syntax::TreeBuilder { +public: +  TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { +    for (const auto &T : Arena.getTokenBuffer().expandedTokens()) +      LocationToToken.insert({T.location(), &T}); +  } + +  llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } +  const SourceManager &sourceManager() const { +    return Arena.getSourceManager(); +  } + +  /// Populate children for \p New node, assuming it covers tokens from \p +  /// Range. +  void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { +    assert(New); +    Pending.foldChildren(Arena, Range, New); +    if (From) +      Mapping.add(From, New); +  } + +  void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { +    // FIXME: add mapping for TypeLocs +    foldNode(Range, New, nullptr); +  } + +  void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, +                NestedNameSpecifierLoc From) { +    assert(New); +    Pending.foldChildren(Arena, Range, New); +    if (From) +      Mapping.add(From, New); +  } + +  /// Populate children for \p New list, assuming it covers tokens from a +  /// subrange of \p SuperRange. +  void foldList(ArrayRef<syntax::Token> SuperRange, syntax::List *New, +                ASTPtr From) { +    assert(New); +    auto ListRange = Pending.shrinkToFitList(SuperRange); +    Pending.foldChildren(Arena, ListRange, New); +    if (From) +      Mapping.add(From, New); +  } + +  /// Notifies that we should not consume trailing semicolon when computing +  /// token range of \p D. +  void noticeDeclWithoutSemicolon(Decl *D); + +  /// Mark the \p Child node with a corresponding \p Role. All marked children +  /// should be consumed by foldNode. +  /// When called on expressions (clang::Expr is derived from clang::Stmt), +  /// wraps expressions into expression statement. +  void markStmtChild(Stmt *Child, NodeRole Role); +  /// Should be called for expressions in non-statement position to avoid +  /// wrapping into expression statement. +  void markExprChild(Expr *Child, NodeRole Role); +  /// Set role for a token starting at \p Loc. +  void markChildToken(SourceLocation Loc, NodeRole R); +  /// Set role for \p T. +  void markChildToken(const syntax::Token *T, NodeRole R); + +  /// Set role for \p N. +  void markChild(syntax::Node *N, NodeRole R); +  /// Set role for the syntax node matching \p N. +  void markChild(ASTPtr N, NodeRole R); +  /// Set role for the syntax node matching \p N. +  void markChild(NestedNameSpecifierLoc N, NodeRole R); + +  /// Finish building the tree and consume the root node. +  syntax::TranslationUnit *finalize() && { +    auto Tokens = Arena.getTokenBuffer().expandedTokens(); +    assert(!Tokens.empty()); +    assert(Tokens.back().kind() == tok::eof); + +    // Build the root of the tree, consuming all the children. +    Pending.foldChildren(Arena, Tokens.drop_back(), +                         new (Arena.getAllocator()) syntax::TranslationUnit); + +    auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); +    TU->assertInvariantsRecursive(); +    return TU; +  } + +  /// Finds a token starting at \p L. The token must exist if \p L is valid. +  const syntax::Token *findToken(SourceLocation L) const; + +  /// Finds the syntax tokens corresponding to the \p SourceRange. +  ArrayRef<syntax::Token> getRange(SourceRange Range) const { +    assert(Range.isValid()); +    return getRange(Range.getBegin(), Range.getEnd()); +  } + +  /// Finds the syntax tokens corresponding to the passed source locations. +  /// \p First is the start position of the first token and \p Last is the start +  /// position of the last token. +  ArrayRef<syntax::Token> getRange(SourceLocation First, +                                   SourceLocation Last) const { +    assert(First.isValid()); +    assert(Last.isValid()); +    assert(First == Last || +           Arena.getSourceManager().isBeforeInTranslationUnit(First, Last)); +    return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); +  } + +  ArrayRef<syntax::Token> +  getTemplateRange(const ClassTemplateSpecializationDecl *D) const { +    auto Tokens = getRange(D->getSourceRange()); +    return maybeAppendSemicolon(Tokens, D); +  } + +  /// Returns true if \p D is the last declarator in a chain and is thus +  /// reponsible for creating SimpleDeclaration for the whole chain. +  bool isResponsibleForCreatingDeclaration(const Decl *D) const { +    assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && +           "only DeclaratorDecl and TypedefNameDecl are supported."); + +    const Decl *Next = D->getNextDeclInContext(); + +    // There's no next sibling, this one is responsible. +    if (Next == nullptr) { +      return true; +    } + +    // Next sibling is not the same type, this one is responsible. +    if (D->getKind() != Next->getKind()) { +      return true; +    } +    // Next sibling doesn't begin at the same loc, it must be a different +    // declaration, so this declarator is responsible. +    if (Next->getBeginLoc() != D->getBeginLoc()) { +      return true; +    } + +    // NextT is a member of the same declaration, and we need the last member to +    // create declaration. This one is not responsible. +    return false; +  } + +  ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { +    ArrayRef<syntax::Token> Tokens; +    // We want to drop the template parameters for specializations. +    if (const auto *S = dyn_cast<TagDecl>(D)) +      Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); +    else +      Tokens = getRange(D->getSourceRange()); +    return maybeAppendSemicolon(Tokens, D); +  } + +  ArrayRef<syntax::Token> getExprRange(const Expr *E) const { +    return getRange(E->getSourceRange()); +  } + +  /// Find the adjusted range for the statement, consuming the trailing +  /// semicolon when needed. +  ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { +    auto Tokens = getRange(S->getSourceRange()); +    if (isa<CompoundStmt>(S)) +      return Tokens; + +    // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and +    // all statements that end with those. Consume this semicolon here. +    if (Tokens.back().kind() == tok::semi) +      return Tokens; +    return withTrailingSemicolon(Tokens); +  } + +private: +  ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, +                                               const Decl *D) const { +    if (isa<NamespaceDecl>(D)) +      return Tokens; +    if (DeclsWithoutSemicolons.count(D)) +      return Tokens; +    // FIXME: do not consume trailing semicolon on function definitions. +    // Most declarations own a semicolon in syntax trees, but not in clang AST. +    return withTrailingSemicolon(Tokens); +  } + +  ArrayRef<syntax::Token> +  withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { +    assert(!Tokens.empty()); +    assert(Tokens.back().kind() != tok::eof); +    // We never consume 'eof', so looking at the next token is ok. +    if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) +      return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); +    return Tokens; +  } + +  void setRole(syntax::Node *N, NodeRole R) { +    assert(N->getRole() == NodeRole::Detached); +    N->setRole(R); +  } + +  /// A collection of trees covering the input tokens. +  /// When created, each tree corresponds to a single token in the file. +  /// Clients call 'foldChildren' to attach one or more subtrees to a parent +  /// node and update the list of trees accordingly. +  /// +  /// Ensures that added nodes properly nest and cover the whole token stream. +  struct Forest { +    Forest(syntax::Arena &A) { +      assert(!A.getTokenBuffer().expandedTokens().empty()); +      assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof); +      // Create all leaf nodes. +      // Note that we do not have 'eof' in the tree. +      for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) { +        auto *L = new (A.getAllocator()) syntax::Leaf(&T); +        L->Original = true; +        L->CanModify = A.getTokenBuffer().spelledForExpanded(T).hasValue(); +        Trees.insert(Trees.end(), {&T, L}); +      } +    } + +    void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { +      assert(!Range.empty()); +      auto It = Trees.lower_bound(Range.begin()); +      assert(It != Trees.end() && "no node found"); +      assert(It->first == Range.begin() && "no child with the specified range"); +      assert((std::next(It) == Trees.end() || +              std::next(It)->first == Range.end()) && +             "no child with the specified range"); +      assert(It->second->getRole() == NodeRole::Detached && +             "re-assigning role for a child"); +      It->second->setRole(Role); +    } + +    /// Shrink \p Range to a subrange that only contains tokens of a list. +    /// List elements and delimiters should already have correct roles. +    ArrayRef<syntax::Token> shrinkToFitList(ArrayRef<syntax::Token> Range) { +      auto BeginChildren = Trees.lower_bound(Range.begin()); +      assert((BeginChildren == Trees.end() || +              BeginChildren->first == Range.begin()) && +             "Range crosses boundaries of existing subtrees"); + +      auto EndChildren = Trees.lower_bound(Range.end()); +      assert( +          (EndChildren == Trees.end() || EndChildren->first == Range.end()) && +          "Range crosses boundaries of existing subtrees"); + +      auto BelongsToList = [](decltype(Trees)::value_type KV) { +        auto Role = KV.second->getRole(); +        return Role == syntax::NodeRole::ListElement || +               Role == syntax::NodeRole::ListDelimiter; +      }; + +      auto BeginListChildren = +          std::find_if(BeginChildren, EndChildren, BelongsToList); + +      auto EndListChildren = +          std::find_if_not(BeginListChildren, EndChildren, BelongsToList); + +      return ArrayRef<syntax::Token>(BeginListChildren->first, +                                     EndListChildren->first); +    } + +    /// Add \p Node to the forest and attach child nodes based on \p Tokens. +    void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens, +                      syntax::Tree *Node) { +      // Attach children to `Node`. +      assert(Node->getFirstChild() == nullptr && "node already has children"); + +      auto *FirstToken = Tokens.begin(); +      auto BeginChildren = Trees.lower_bound(FirstToken); + +      assert((BeginChildren == Trees.end() || +              BeginChildren->first == FirstToken) && +             "fold crosses boundaries of existing subtrees"); +      auto EndChildren = Trees.lower_bound(Tokens.end()); +      assert( +          (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && +          "fold crosses boundaries of existing subtrees"); + +      for (auto It = BeginChildren; It != EndChildren; ++It) { +        auto *C = It->second; +        if (C->getRole() == NodeRole::Detached) +          C->setRole(NodeRole::Unknown); +        Node->appendChildLowLevel(C); +      } + +      // Mark that this node came from the AST and is backed by the source code. +      Node->Original = true; +      Node->CanModify = +          A.getTokenBuffer().spelledForExpanded(Tokens).hasValue(); + +      Trees.erase(BeginChildren, EndChildren); +      Trees.insert({FirstToken, Node}); +    } + +    // EXPECTS: all tokens were consumed and are owned by a single root node. +    syntax::Node *finalize() && { +      assert(Trees.size() == 1); +      auto *Root = Trees.begin()->second; +      Trees = {}; +      return Root; +    } + +    std::string str(const syntax::Arena &A) const { +      std::string R; +      for (auto It = Trees.begin(); It != Trees.end(); ++It) { +        unsigned CoveredTokens = +            It != Trees.end() +                ? (std::next(It)->first - It->first) +                : A.getTokenBuffer().expandedTokens().end() - It->first; + +        R += std::string( +            formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(), +                    It->first->text(A.getSourceManager()), CoveredTokens)); +        R += It->second->dump(A.getSourceManager()); +      } +      return R; +    } + +  private: +    /// Maps from the start token to a subtree starting at that token. +    /// Keys in the map are pointers into the array of expanded tokens, so +    /// pointer order corresponds to the order of preprocessor tokens. +    std::map<const syntax::Token *, syntax::Node *> Trees; +  }; + +  /// For debugging purposes. +  std::string str() { return Pending.str(Arena); } + +  syntax::Arena &Arena; +  /// To quickly find tokens by their start location. +  llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken; +  Forest Pending; +  llvm::DenseSet<Decl *> DeclsWithoutSemicolons; +  ASTToSyntaxMapping Mapping; +}; + +namespace { +class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { +public: +  explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) +      : Builder(Builder), Context(Context) {} + +  bool shouldTraversePostOrder() const { return true; } + +  bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { +    return processDeclaratorAndDeclaration(DD); +  } + +  bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { +    return processDeclaratorAndDeclaration(TD); +  } + +  bool VisitDecl(Decl *D) { +    assert(!D->isImplicit()); +    Builder.foldNode(Builder.getDeclarationRange(D), +                     new (allocator()) syntax::UnknownDeclaration(), D); +    return true; +  } + +  // RAV does not call WalkUpFrom* on explicit instantiations, so we have to +  // override Traverse. +  // FIXME: make RAV call WalkUpFrom* instead. +  bool +  TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { +    if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) +      return false; +    if (C->isExplicitSpecialization()) +      return true; // we are only interested in explicit instantiations. +    auto *Declaration = +        cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); +    foldExplicitTemplateInstantiation( +        Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), +        Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); +    return true; +  } + +  bool WalkUpFromTemplateDecl(TemplateDecl *S) { +    foldTemplateDeclaration( +        Builder.getDeclarationRange(S), +        Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), +        Builder.getDeclarationRange(S->getTemplatedDecl()), S); +    return true; +  } + +  bool WalkUpFromTagDecl(TagDecl *C) { +    // FIXME: build the ClassSpecifier node. +    if (!C->isFreeStanding()) { +      assert(C->getNumTemplateParameterLists() == 0); +      return true; +    } +    handleFreeStandingTagDecl(C); +    return true; +  } + +  syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { +    assert(C->isFreeStanding()); +    // Class is a declaration specifier and needs a spanning declaration node. +    auto DeclarationRange = Builder.getDeclarationRange(C); +    syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; +    Builder.foldNode(DeclarationRange, Result, nullptr); + +    // Build TemplateDeclaration nodes if we had template parameters. +    auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { +      const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); +      auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); +      Result = +          foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); +      DeclarationRange = R; +    }; +    if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) +      ConsumeTemplateParameters(*S->getTemplateParameters()); +    for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) +      ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); +    return Result; +  } + +  bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { +    // We do not want to call VisitDecl(), the declaration for translation +    // unit is built by finalize(). +    return true; +  } + +  bool WalkUpFromCompoundStmt(CompoundStmt *S) { +    using NodeRole = syntax::NodeRole; + +    Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); +    for (auto *Child : S->body()) +      Builder.markStmtChild(Child, NodeRole::Statement); +    Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); + +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::CompoundStatement, S); +    return true; +  } + +  // Some statements are not yet handled by syntax trees. +  bool WalkUpFromStmt(Stmt *S) { +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::UnknownStatement, S); +    return true; +  } + +  bool TraverseIfStmt(IfStmt *S) { +    bool Result = [&, this]() { +      if (S->getInit() && !TraverseStmt(S->getInit())) { +        return false; +      } +      // In cases where the condition is an initialized declaration in a +      // statement, we want to preserve the declaration and ignore the +      // implicit condition expression in the syntax tree. +      if (S->hasVarStorage()) { +        if (!TraverseStmt(S->getConditionVariableDeclStmt())) +          return false; +      } else if (S->getCond() && !TraverseStmt(S->getCond())) +        return false; + +      if (S->getThen() && !TraverseStmt(S->getThen())) +        return false; +      if (S->getElse() && !TraverseStmt(S->getElse())) +        return false; +      return true; +    }(); +    WalkUpFromIfStmt(S); +    return Result; +  } + +  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { +    // We override to traverse range initializer as VarDecl. +    // RAV traverses it as a statement, we produce invalid node kinds in that +    // case. +    // FIXME: should do this in RAV instead? +    bool Result = [&, this]() { +      if (S->getInit() && !TraverseStmt(S->getInit())) +        return false; +      if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) +        return false; +      if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) +        return false; +      if (S->getBody() && !TraverseStmt(S->getBody())) +        return false; +      return true; +    }(); +    WalkUpFromCXXForRangeStmt(S); +    return Result; +  } + +  bool TraverseStmt(Stmt *S) { +    if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) { +      // We want to consume the semicolon, make sure SimpleDeclaration does not. +      for (auto *D : DS->decls()) +        Builder.noticeDeclWithoutSemicolon(D); +    } else if (auto *E = dyn_cast_or_null<Expr>(S)) { +      return RecursiveASTVisitor::TraverseStmt(IgnoreImplicit(E)); +    } +    return RecursiveASTVisitor::TraverseStmt(S); +  } + +  bool TraverseOpaqueValueExpr(OpaqueValueExpr *VE) { +    // OpaqueValue doesn't correspond to concrete syntax, ignore it. +    return true; +  } + +  // Some expressions are not yet handled by syntax trees. +  bool WalkUpFromExpr(Expr *E) { +    assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); +    Builder.foldNode(Builder.getExprRange(E), +                     new (allocator()) syntax::UnknownExpression, E); +    return true; +  } + +  bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { +    // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node +    // referencing the location of the UDL suffix (`_w` in `1.2_w`). The +    // UDL suffix location does not point to the beginning of a token, so we +    // can't represent the UDL suffix as a separate syntax tree node. + +    return WalkUpFromUserDefinedLiteral(S); +  } + +  syntax::UserDefinedLiteralExpression * +  buildUserDefinedLiteral(UserDefinedLiteral *S) { +    switch (S->getLiteralOperatorKind()) { +    case UserDefinedLiteral::LOK_Integer: +      return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; +    case UserDefinedLiteral::LOK_Floating: +      return new (allocator()) syntax::FloatUserDefinedLiteralExpression; +    case UserDefinedLiteral::LOK_Character: +      return new (allocator()) syntax::CharUserDefinedLiteralExpression; +    case UserDefinedLiteral::LOK_String: +      return new (allocator()) syntax::StringUserDefinedLiteralExpression; +    case UserDefinedLiteral::LOK_Raw: +    case UserDefinedLiteral::LOK_Template: +      // For raw literal operator and numeric literal operator template we +      // cannot get the type of the operand in the semantic AST. We get this +      // information from the token. As integer and floating point have the same +      // token kind, we run `NumericLiteralParser` again to distinguish them. +      auto TokLoc = S->getBeginLoc(); +      auto TokSpelling = +          Builder.findToken(TokLoc)->text(Context.getSourceManager()); +      auto Literal = +          NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), +                               Context.getLangOpts(), Context.getTargetInfo(), +                               Context.getDiagnostics()); +      if (Literal.isIntegerLiteral()) +        return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; +      else { +        assert(Literal.isFloatingLiteral()); +        return new (allocator()) syntax::FloatUserDefinedLiteralExpression; +      } +    } +    llvm_unreachable("Unknown literal operator kind."); +  } + +  bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { +    Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); +    Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); +    return true; +  } + +  // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the +  // `DependentTemplateSpecializationType` case. +  /// Given a nested-name-specifier return the range for the last name +  /// specifier. +  /// +  /// e.g. `std::T::template X<U>::` => `template X<U>::` +  SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) { +    auto SR = NNSLoc.getLocalSourceRange(); + +    // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should* +    // return the desired `SourceRange`, but there is a corner case. For a +    // `DependentTemplateSpecializationType` this method returns its +    // qualifiers as well, in other words in the example above this method +    // returns `T::template X<U>::` instead of only `template X<U>::` +    if (auto TL = NNSLoc.getTypeLoc()) { +      if (auto DependentTL = +              TL.getAs<DependentTemplateSpecializationTypeLoc>()) { +        // The 'template' keyword is always present in dependent template +        // specializations. Except in the case of incorrect code +        // TODO: Treat the case of incorrect code. +        SR.setBegin(DependentTL.getTemplateKeywordLoc()); +      } +    } + +    return SR; +  } + +  syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) { +    switch (NNS.getKind()) { +    case NestedNameSpecifier::Global: +      return syntax::NodeKind::GlobalNameSpecifier; +    case NestedNameSpecifier::Namespace: +    case NestedNameSpecifier::NamespaceAlias: +    case NestedNameSpecifier::Identifier: +      return syntax::NodeKind::IdentifierNameSpecifier; +    case NestedNameSpecifier::TypeSpecWithTemplate: +      return syntax::NodeKind::SimpleTemplateNameSpecifier; +    case NestedNameSpecifier::TypeSpec: { +      const auto *NNSType = NNS.getAsType(); +      assert(NNSType); +      if (isa<DecltypeType>(NNSType)) +        return syntax::NodeKind::DecltypeNameSpecifier; +      if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>( +              NNSType)) +        return syntax::NodeKind::SimpleTemplateNameSpecifier; +      return syntax::NodeKind::IdentifierNameSpecifier; +    } +    default: +      // FIXME: Support Microsoft's __super +      llvm::report_fatal_error("We don't yet support the __super specifier", +                               true); +    } +  } + +  syntax::NameSpecifier * +  buildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) { +    assert(NNSLoc.hasQualifier()); +    auto NameSpecifierTokens = +        Builder.getRange(getLocalSourceRange(NNSLoc)).drop_back(); +    switch (getNameSpecifierKind(*NNSLoc.getNestedNameSpecifier())) { +    case syntax::NodeKind::GlobalNameSpecifier: +      return new (allocator()) syntax::GlobalNameSpecifier; +    case syntax::NodeKind::IdentifierNameSpecifier: { +      assert(NameSpecifierTokens.size() == 1); +      Builder.markChildToken(NameSpecifierTokens.begin(), +                             syntax::NodeRole::Unknown); +      auto *NS = new (allocator()) syntax::IdentifierNameSpecifier; +      Builder.foldNode(NameSpecifierTokens, NS, nullptr); +      return NS; +    } +    case syntax::NodeKind::SimpleTemplateNameSpecifier: { +      // TODO: Build `SimpleTemplateNameSpecifier` children and implement +      // accessors to them. +      // Be aware, we cannot do that simply by calling `TraverseTypeLoc`, +      // some `TypeLoc`s have inside them the previous name specifier and +      // we want to treat them independently. +      auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier; +      Builder.foldNode(NameSpecifierTokens, NS, nullptr); +      return NS; +    } +    case syntax::NodeKind::DecltypeNameSpecifier: { +      const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>(); +      if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL)) +        return nullptr; +      auto *NS = new (allocator()) syntax::DecltypeNameSpecifier; +      // TODO: Implement accessor to `DecltypeNameSpecifier` inner +      // `DecltypeTypeLoc`. +      // For that add mapping from `TypeLoc` to `syntax::Node*` then: +      // Builder.markChild(TypeLoc, syntax::NodeRole); +      Builder.foldNode(NameSpecifierTokens, NS, nullptr); +      return NS; +    } +    default: +      llvm_unreachable("getChildKind() does not return this value"); +    } +  } + +  // To build syntax tree nodes for NestedNameSpecifierLoc we override +  // Traverse instead of WalkUpFrom because we want to traverse the children +  // ourselves and build a list instead of a nested tree of name specifier +  // prefixes. +  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) { +    if (!QualifierLoc) +      return true; +    for (auto It = QualifierLoc; It; It = It.getPrefix()) { +      auto *NS = buildNameSpecifier(It); +      if (!NS) +        return false; +      Builder.markChild(NS, syntax::NodeRole::ListElement); +      Builder.markChildToken(It.getEndLoc(), syntax::NodeRole::ListDelimiter); +    } +    Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), +                     new (allocator()) syntax::NestedNameSpecifier, +                     QualifierLoc); +    return true; +  } + +  syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc, +                                          SourceLocation TemplateKeywordLoc, +                                          SourceRange UnqualifiedIdLoc, +                                          ASTPtr From) { +    if (QualifierLoc) { +      Builder.markChild(QualifierLoc, syntax::NodeRole::Qualifier); +      if (TemplateKeywordLoc.isValid()) +        Builder.markChildToken(TemplateKeywordLoc, +                               syntax::NodeRole::TemplateKeyword); +    } + +    auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId; +    Builder.foldNode(Builder.getRange(UnqualifiedIdLoc), TheUnqualifiedId, +                     nullptr); +    Builder.markChild(TheUnqualifiedId, syntax::NodeRole::UnqualifiedId); + +    auto IdExpressionBeginLoc = +        QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin(); + +    auto *TheIdExpression = new (allocator()) syntax::IdExpression; +    Builder.foldNode( +        Builder.getRange(IdExpressionBeginLoc, UnqualifiedIdLoc.getEnd()), +        TheIdExpression, From); + +    return TheIdExpression; +  } + +  bool WalkUpFromMemberExpr(MemberExpr *S) { +    // For `MemberExpr` with implicit `this->` we generate a simple +    // `id-expression` syntax node, beacuse an implicit `member-expression` is +    // syntactically undistinguishable from an `id-expression` +    if (S->isImplicitAccess()) { +      buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), +                        SourceRange(S->getMemberLoc(), S->getEndLoc()), S); +      return true; +    } + +    auto *TheIdExpression = buildIdExpression( +        S->getQualifierLoc(), S->getTemplateKeywordLoc(), +        SourceRange(S->getMemberLoc(), S->getEndLoc()), nullptr); + +    Builder.markChild(TheIdExpression, syntax::NodeRole::Member); + +    Builder.markExprChild(S->getBase(), syntax::NodeRole::Object); +    Builder.markChildToken(S->getOperatorLoc(), syntax::NodeRole::AccessToken); + +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::MemberExpression, S); +    return true; +  } + +  bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { +    buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), +                      SourceRange(S->getLocation(), S->getEndLoc()), S); + +    return true; +  } + +  // Same logic as DeclRefExpr. +  bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) { +    buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), +                      SourceRange(S->getLocation(), S->getEndLoc()), S); + +    return true; +  } + +  bool WalkUpFromCXXThisExpr(CXXThisExpr *S) { +    if (!S->isImplicit()) { +      Builder.markChildToken(S->getLocation(), +                             syntax::NodeRole::IntroducerKeyword); +      Builder.foldNode(Builder.getExprRange(S), +                       new (allocator()) syntax::ThisExpression, S); +    } +    return true; +  } + +  bool WalkUpFromParenExpr(ParenExpr *S) { +    Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); +    Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::SubExpression); +    Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::ParenExpression, S); +    return true; +  } + +  bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { +    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::IntegerLiteralExpression, S); +    return true; +  } + +  bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { +    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::CharacterLiteralExpression, S); +    return true; +  } + +  bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { +    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::FloatingLiteralExpression, S); +    return true; +  } + +  bool WalkUpFromStringLiteral(StringLiteral *S) { +    Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::StringLiteralExpression, S); +    return true; +  } + +  bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { +    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::BoolLiteralExpression, S); +    return true; +  } + +  bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { +    Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::CxxNullPtrExpression, S); +    return true; +  } + +  bool WalkUpFromUnaryOperator(UnaryOperator *S) { +    Builder.markChildToken(S->getOperatorLoc(), +                           syntax::NodeRole::OperatorToken); +    Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::Operand); + +    if (S->isPostfix()) +      Builder.foldNode(Builder.getExprRange(S), +                       new (allocator()) syntax::PostfixUnaryOperatorExpression, +                       S); +    else +      Builder.foldNode(Builder.getExprRange(S), +                       new (allocator()) syntax::PrefixUnaryOperatorExpression, +                       S); + +    return true; +  } + +  bool WalkUpFromBinaryOperator(BinaryOperator *S) { +    Builder.markExprChild(S->getLHS(), syntax::NodeRole::LeftHandSide); +    Builder.markChildToken(S->getOperatorLoc(), +                           syntax::NodeRole::OperatorToken); +    Builder.markExprChild(S->getRHS(), syntax::NodeRole::RightHandSide); +    Builder.foldNode(Builder.getExprRange(S), +                     new (allocator()) syntax::BinaryOperatorExpression, S); +    return true; +  } + +  /// Builds `CallArguments` syntax node from arguments that appear in source +  /// code, i.e. not default arguments. +  syntax::CallArguments * +  buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) { +    auto Args = dropDefaultArgs(ArgsAndDefaultArgs); +    for (auto *Arg : Args) { +      Builder.markExprChild(Arg, syntax::NodeRole::ListElement); +      const auto *DelimiterToken = +          std::next(Builder.findToken(Arg->getEndLoc())); +      if (DelimiterToken->kind() == clang::tok::TokenKind::comma) +        Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); +    } + +    auto *Arguments = new (allocator()) syntax::CallArguments; +    if (!Args.empty()) +      Builder.foldNode(Builder.getRange((*Args.begin())->getBeginLoc(), +                                        (*(Args.end() - 1))->getEndLoc()), +                       Arguments, nullptr); + +    return Arguments; +  } + +  bool WalkUpFromCallExpr(CallExpr *S) { +    Builder.markExprChild(S->getCallee(), syntax::NodeRole::Callee); + +    const auto *LParenToken = +        std::next(Builder.findToken(S->getCallee()->getEndLoc())); +    // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have fixed +    // the test on decltype desctructors. +    if (LParenToken->kind() == clang::tok::l_paren) +      Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); + +    Builder.markChild(buildCallArguments(S->arguments()), +                      syntax::NodeRole::Arguments); + +    Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); + +    Builder.foldNode(Builder.getRange(S->getSourceRange()), +                     new (allocator()) syntax::CallExpression, S); +    return true; +  } + +  bool WalkUpFromCXXConstructExpr(CXXConstructExpr *S) { +    // Ignore the implicit calls to default constructors. +    if ((S->getNumArgs() == 0 || isa<CXXDefaultArgExpr>(S->getArg(0))) && +        S->getParenOrBraceRange().isInvalid()) +      return true; +    return RecursiveASTVisitor::WalkUpFromCXXConstructExpr(S); +  } + +  bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { +    // To construct a syntax tree of the same shape for calls to built-in and +    // user-defined operators, ignore the `DeclRefExpr` that refers to the +    // operator and treat it as a simple token. Do that by traversing +    // arguments instead of children. +    for (auto *child : S->arguments()) { +      // A postfix unary operator is declared as taking two operands. The +      // second operand is used to distinguish from its prefix counterpart. In +      // the semantic AST this "phantom" operand is represented as a +      // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this +      // operand because it does not correspond to anything written in source +      // code. +      if (child->getSourceRange().isInvalid()) { +        assert(getOperatorNodeKind(*S) == +               syntax::NodeKind::PostfixUnaryOperatorExpression); +        continue; +      } +      if (!TraverseStmt(child)) +        return false; +    } +    return WalkUpFromCXXOperatorCallExpr(S); +  } + +  bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { +    switch (getOperatorNodeKind(*S)) { +    case syntax::NodeKind::BinaryOperatorExpression: +      Builder.markExprChild(S->getArg(0), syntax::NodeRole::LeftHandSide); +      Builder.markChildToken(S->getOperatorLoc(), +                             syntax::NodeRole::OperatorToken); +      Builder.markExprChild(S->getArg(1), syntax::NodeRole::RightHandSide); +      Builder.foldNode(Builder.getExprRange(S), +                       new (allocator()) syntax::BinaryOperatorExpression, S); +      return true; +    case syntax::NodeKind::PrefixUnaryOperatorExpression: +      Builder.markChildToken(S->getOperatorLoc(), +                             syntax::NodeRole::OperatorToken); +      Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand); +      Builder.foldNode(Builder.getExprRange(S), +                       new (allocator()) syntax::PrefixUnaryOperatorExpression, +                       S); +      return true; +    case syntax::NodeKind::PostfixUnaryOperatorExpression: +      Builder.markChildToken(S->getOperatorLoc(), +                             syntax::NodeRole::OperatorToken); +      Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand); +      Builder.foldNode(Builder.getExprRange(S), +                       new (allocator()) syntax::PostfixUnaryOperatorExpression, +                       S); +      return true; +    case syntax::NodeKind::CallExpression: { +      Builder.markExprChild(S->getArg(0), syntax::NodeRole::Callee); + +      const auto *LParenToken = +          std::next(Builder.findToken(S->getArg(0)->getEndLoc())); +      // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have +      // fixed the test on decltype desctructors. +      if (LParenToken->kind() == clang::tok::l_paren) +        Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); + +      Builder.markChild(buildCallArguments(CallExpr::arg_range( +                            S->arg_begin() + 1, S->arg_end())), +                        syntax::NodeRole::Arguments); + +      Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); + +      Builder.foldNode(Builder.getRange(S->getSourceRange()), +                       new (allocator()) syntax::CallExpression, S); +      return true; +    } +    case syntax::NodeKind::UnknownExpression: +      return WalkUpFromExpr(S); +    default: +      llvm_unreachable("getOperatorNodeKind() does not return this value"); +    } +  } + +  bool WalkUpFromCXXDefaultArgExpr(CXXDefaultArgExpr *S) { return true; } + +  bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { +    auto Tokens = Builder.getDeclarationRange(S); +    if (Tokens.front().kind() == tok::coloncolon) { +      // Handle nested namespace definitions. Those start at '::' token, e.g. +      // namespace a^::b {} +      // FIXME: build corresponding nodes for the name of this namespace. +      return true; +    } +    Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); +    return true; +  } + +  // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test +  // results. Find test coverage or remove it. +  bool TraverseParenTypeLoc(ParenTypeLoc L) { +    // We reverse order of traversal to get the proper syntax structure. +    if (!WalkUpFromParenTypeLoc(L)) +      return false; +    return TraverseTypeLoc(L.getInnerLoc()); +  } + +  bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { +    Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); +    Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); +    Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), +                     new (allocator()) syntax::ParenDeclarator, L); +    return true; +  } + +  // Declarator chunks, they are produced by type locs and some clang::Decls. +  bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { +    Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); +    Builder.markExprChild(L.getSizeExpr(), syntax::NodeRole::Size); +    Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); +    Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), +                     new (allocator()) syntax::ArraySubscript, L); +    return true; +  } + +  syntax::ParameterDeclarationList * +  buildParameterDeclarationList(ArrayRef<ParmVarDecl *> Params) { +    for (auto *P : Params) { +      Builder.markChild(P, syntax::NodeRole::ListElement); +      const auto *DelimiterToken = std::next(Builder.findToken(P->getEndLoc())); +      if (DelimiterToken->kind() == clang::tok::TokenKind::comma) +        Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); +    } +    auto *Parameters = new (allocator()) syntax::ParameterDeclarationList; +    if (!Params.empty()) +      Builder.foldNode(Builder.getRange(Params.front()->getBeginLoc(), +                                        Params.back()->getEndLoc()), +                       Parameters, nullptr); +    return Parameters; +  } + +  bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { +    Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); + +    Builder.markChild(buildParameterDeclarationList(L.getParams()), +                      syntax::NodeRole::Parameters); + +    Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); +    Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), +                     new (allocator()) syntax::ParametersAndQualifiers, L); +    return true; +  } + +  bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { +    if (!L.getTypePtr()->hasTrailingReturn()) +      return WalkUpFromFunctionTypeLoc(L); + +    auto *TrailingReturnTokens = buildTrailingReturn(L); +    // Finish building the node for parameters. +    Builder.markChild(TrailingReturnTokens, syntax::NodeRole::TrailingReturn); +    return WalkUpFromFunctionTypeLoc(L); +  } + +  bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { +    // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds +    // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to +    // "(Y::*mp)" We thus reverse the order of traversal to get the proper +    // syntax structure. +    if (!WalkUpFromMemberPointerTypeLoc(L)) +      return false; +    return TraverseTypeLoc(L.getPointeeLoc()); +  } + +  bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { +    auto SR = L.getLocalSourceRange(); +    Builder.foldNode(Builder.getRange(SR), +                     new (allocator()) syntax::MemberPointer, L); +    return true; +  } + +  // The code below is very regular, it could even be generated with some +  // preprocessor magic. We merely assign roles to the corresponding children +  // and fold resulting nodes. +  bool WalkUpFromDeclStmt(DeclStmt *S) { +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::DeclarationStatement, S); +    return true; +  } + +  bool WalkUpFromNullStmt(NullStmt *S) { +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::EmptyStatement, S); +    return true; +  } + +  bool WalkUpFromSwitchStmt(SwitchStmt *S) { +    Builder.markChildToken(S->getSwitchLoc(), +                           syntax::NodeRole::IntroducerKeyword); +    Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::SwitchStatement, S); +    return true; +  } + +  bool WalkUpFromCaseStmt(CaseStmt *S) { +    Builder.markChildToken(S->getKeywordLoc(), +                           syntax::NodeRole::IntroducerKeyword); +    Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseValue); +    Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::CaseStatement, S); +    return true; +  } + +  bool WalkUpFromDefaultStmt(DefaultStmt *S) { +    Builder.markChildToken(S->getKeywordLoc(), +                           syntax::NodeRole::IntroducerKeyword); +    Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::DefaultStatement, S); +    return true; +  } + +  bool WalkUpFromIfStmt(IfStmt *S) { +    Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); +    Stmt *ConditionStatement = S->getCond(); +    if (S->hasVarStorage()) +      ConditionStatement = S->getConditionVariableDeclStmt(); +    Builder.markStmtChild(ConditionStatement, syntax::NodeRole::Condition); +    Builder.markStmtChild(S->getThen(), syntax::NodeRole::ThenStatement); +    Builder.markChildToken(S->getElseLoc(), syntax::NodeRole::ElseKeyword); +    Builder.markStmtChild(S->getElse(), syntax::NodeRole::ElseStatement); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::IfStatement, S); +    return true; +  } + +  bool WalkUpFromForStmt(ForStmt *S) { +    Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); +    Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::ForStatement, S); +    return true; +  } + +  bool WalkUpFromWhileStmt(WhileStmt *S) { +    Builder.markChildToken(S->getWhileLoc(), +                           syntax::NodeRole::IntroducerKeyword); +    Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::WhileStatement, S); +    return true; +  } + +  bool WalkUpFromContinueStmt(ContinueStmt *S) { +    Builder.markChildToken(S->getContinueLoc(), +                           syntax::NodeRole::IntroducerKeyword); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::ContinueStatement, S); +    return true; +  } + +  bool WalkUpFromBreakStmt(BreakStmt *S) { +    Builder.markChildToken(S->getBreakLoc(), +                           syntax::NodeRole::IntroducerKeyword); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::BreakStatement, S); +    return true; +  } + +  bool WalkUpFromReturnStmt(ReturnStmt *S) { +    Builder.markChildToken(S->getReturnLoc(), +                           syntax::NodeRole::IntroducerKeyword); +    Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnValue); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::ReturnStatement, S); +    return true; +  } + +  bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { +    Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); +    Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); +    Builder.foldNode(Builder.getStmtRange(S), +                     new (allocator()) syntax::RangeBasedForStatement, S); +    return true; +  } + +  bool WalkUpFromEmptyDecl(EmptyDecl *S) { +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::EmptyDeclaration, S); +    return true; +  } + +  bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { +    Builder.markExprChild(S->getAssertExpr(), syntax::NodeRole::Condition); +    Builder.markExprChild(S->getMessage(), syntax::NodeRole::Message); +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::StaticAssertDeclaration, S); +    return true; +  } + +  bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::LinkageSpecificationDeclaration, +                     S); +    return true; +  } + +  bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::NamespaceAliasDefinition, S); +    return true; +  } + +  bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::UsingNamespaceDirective, S); +    return true; +  } + +  bool WalkUpFromUsingDecl(UsingDecl *S) { +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::UsingDeclaration, S); +    return true; +  } + +  bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::UsingDeclaration, S); +    return true; +  } + +  bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::UsingDeclaration, S); +    return true; +  } + +  bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { +    Builder.foldNode(Builder.getDeclarationRange(S), +                     new (allocator()) syntax::TypeAliasDeclaration, S); +    return true; +  } + +private: +  /// Folds SimpleDeclarator node (if present) and in case this is the last +  /// declarator in the chain it also folds SimpleDeclaration node. +  template <class T> bool processDeclaratorAndDeclaration(T *D) { +    auto Range = getDeclaratorRange( +        Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(), +        getQualifiedNameStart(D), getInitializerRange(D)); + +    // There doesn't have to be a declarator (e.g. `void foo(int)` only has +    // declaration, but no declarator). +    if (!Range.getBegin().isValid()) { +      Builder.markChild(new (allocator()) syntax::DeclaratorList, +                        syntax::NodeRole::Declarators); +      Builder.foldNode(Builder.getDeclarationRange(D), +                       new (allocator()) syntax::SimpleDeclaration, D); +      return true; +    } + +    auto *N = new (allocator()) syntax::SimpleDeclarator; +    Builder.foldNode(Builder.getRange(Range), N, nullptr); +    Builder.markChild(N, syntax::NodeRole::ListElement); + +    if (!Builder.isResponsibleForCreatingDeclaration(D)) { +      // If this is not the last declarator in the declaration we expect a +      // delimiter after it. +      const auto *DelimiterToken = std::next(Builder.findToken(Range.getEnd())); +      if (DelimiterToken->kind() == clang::tok::TokenKind::comma) +        Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); +    } else { +      auto *DL = new (allocator()) syntax::DeclaratorList; +      auto DeclarationRange = Builder.getDeclarationRange(D); +      Builder.foldList(DeclarationRange, DL, nullptr); + +      Builder.markChild(DL, syntax::NodeRole::Declarators); +      Builder.foldNode(DeclarationRange, +                       new (allocator()) syntax::SimpleDeclaration, D); +    } +    return true; +  } + +  /// Returns the range of the built node. +  syntax::TrailingReturnType *buildTrailingReturn(FunctionProtoTypeLoc L) { +    assert(L.getTypePtr()->hasTrailingReturn()); + +    auto ReturnedType = L.getReturnLoc(); +    // Build node for the declarator, if any. +    auto ReturnDeclaratorRange = SourceRange(GetStartLoc().Visit(ReturnedType), +                                             ReturnedType.getEndLoc()); +    syntax::SimpleDeclarator *ReturnDeclarator = nullptr; +    if (ReturnDeclaratorRange.isValid()) { +      ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; +      Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), +                       ReturnDeclarator, nullptr); +    } + +    // Build node for trailing return type. +    auto Return = Builder.getRange(ReturnedType.getSourceRange()); +    const auto *Arrow = Return.begin() - 1; +    assert(Arrow->kind() == tok::arrow); +    auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); +    Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); +    if (ReturnDeclarator) +      Builder.markChild(ReturnDeclarator, syntax::NodeRole::Declarator); +    auto *R = new (allocator()) syntax::TrailingReturnType; +    Builder.foldNode(Tokens, R, L); +    return R; +  } + +  void foldExplicitTemplateInstantiation( +      ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, +      const syntax::Token *TemplateKW, +      syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { +    assert(!ExternKW || ExternKW->kind() == tok::kw_extern); +    assert(TemplateKW && TemplateKW->kind() == tok::kw_template); +    Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); +    Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); +    Builder.markChild(InnerDeclaration, syntax::NodeRole::Declaration); +    Builder.foldNode( +        Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); +  } + +  syntax::TemplateDeclaration *foldTemplateDeclaration( +      ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, +      ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { +    assert(TemplateKW && TemplateKW->kind() == tok::kw_template); +    Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); + +    auto *N = new (allocator()) syntax::TemplateDeclaration; +    Builder.foldNode(Range, N, From); +    Builder.markChild(N, syntax::NodeRole::Declaration); +    return N; +  } + +  /// A small helper to save some typing. +  llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } + +  syntax::TreeBuilder &Builder; +  const ASTContext &Context; +}; +} // namespace + +void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { +  DeclsWithoutSemicolons.insert(D); +} + +void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { +  if (Loc.isInvalid()) +    return; +  Pending.assignRole(*findToken(Loc), Role); +} + +void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { +  if (!T) +    return; +  Pending.assignRole(*T, R); +} + +void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { +  assert(N); +  setRole(N, R); +} + +void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { +  auto *SN = Mapping.find(N); +  assert(SN != nullptr); +  setRole(SN, R); +} +void syntax::TreeBuilder::markChild(NestedNameSpecifierLoc NNSLoc, NodeRole R) { +  auto *SN = Mapping.find(NNSLoc); +  assert(SN != nullptr); +  setRole(SN, R); +} + +void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { +  if (!Child) +    return; + +  syntax::Tree *ChildNode; +  if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { +    // This is an expression in a statement position, consume the trailing +    // semicolon and form an 'ExpressionStatement' node. +    markExprChild(ChildExpr, NodeRole::Expression); +    ChildNode = new (allocator()) syntax::ExpressionStatement; +    // (!) 'getStmtRange()' ensures this covers a trailing semicolon. +    Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); +  } else { +    ChildNode = Mapping.find(Child); +  } +  assert(ChildNode != nullptr); +  setRole(ChildNode, Role); +} + +void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { +  if (!Child) +    return; +  Child = IgnoreImplicit(Child); + +  syntax::Tree *ChildNode = Mapping.find(Child); +  assert(ChildNode != nullptr); +  setRole(ChildNode, Role); +} + +const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { +  if (L.isInvalid()) +    return nullptr; +  auto It = LocationToToken.find(L); +  assert(It != LocationToToken.end()); +  return It->second; +} + +syntax::TranslationUnit *syntax::buildSyntaxTree(Arena &A, +                                                 ASTContext &Context) { +  TreeBuilder Builder(A); +  BuildTreeVisitor(Context, Builder).TraverseAST(Context); +  return std::move(Builder).finalize(); +}  | 
