diff options
Diffstat (limited to 'clang/lib/Tooling/Syntax')
-rw-r--r-- | clang/lib/Tooling/Syntax/BuildTree.cpp | 1108 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Mutations.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Nodes.cpp | 281 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Tokens.cpp | 609 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Tree.cpp | 23 |
5 files changed, 1598 insertions, 425 deletions
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index aa8844771d373..1f192180ec451 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -6,20 +6,32 @@ // //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/AST/ASTFwd.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclarationName.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" +#include "clang/AST/TypeLoc.h" +#include "clang/AST/TypeLocVisitor.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/Specifiers.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/LiteralSupport.h" #include "clang/Tooling/Syntax/Nodes.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" @@ -27,6 +39,7 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include <cstddef> #include <map> using namespace clang; @@ -34,6 +47,207 @@ using namespace clang; LLVM_ATTRIBUTE_UNUSED static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } +namespace { +/// Get start location of the Declarator from the TypeLoc. +/// E.g.: +/// loc of `(` in `int (a)` +/// loc of `*` in `int *(a)` +/// loc of the first `(` in `int (*a)(int)` +/// loc of the `*` in `int *(a)(int)` +/// loc of the first `*` in `const int *const *volatile a;` +/// +/// It is non-trivial to get the start location because TypeLocs are stored +/// inside out. In the example above `*volatile` is the TypeLoc returned +/// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` +/// returns. +struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { + SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { + auto L = Visit(T.getInnerLoc()); + if (L.isValid()) + return L; + return T.getLParenLoc(); + } + + // Types spelled in the prefix part of the declarator. + SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { + return HandlePointer(T); + } + + SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { + return HandlePointer(T); + } + + SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { + return HandlePointer(T); + } + + SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { + return HandlePointer(T); + } + + SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { + return HandlePointer(T); + } + + // All other cases are not important, as they are either part of declaration + // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on + // existing declarators (e.g. QualifiedTypeLoc). They cannot start the + // declarator themselves, but their underlying type can. + SourceLocation VisitTypeLoc(TypeLoc T) { + auto N = T.getNextTypeLoc(); + if (!N) + return SourceLocation(); + return Visit(N); + } + + SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { + if (T.getTypePtr()->hasTrailingReturn()) + return SourceLocation(); // avoid recursing into the suffix of declarator. + return VisitTypeLoc(T); + } + +private: + template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { + auto L = Visit(T.getPointeeLoc()); + if (L.isValid()) + return L; + return T.getLocalSourceRange().getBegin(); + } +}; +} // namespace + +static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { + switch (E.getOperator()) { + // Comparison + case OO_EqualEqual: + case OO_ExclaimEqual: + case OO_Greater: + case OO_GreaterEqual: + case OO_Less: + case OO_LessEqual: + case OO_Spaceship: + // Assignment + case OO_Equal: + case OO_SlashEqual: + case OO_PercentEqual: + case OO_CaretEqual: + case OO_PipeEqual: + case OO_LessLessEqual: + case OO_GreaterGreaterEqual: + case OO_PlusEqual: + case OO_MinusEqual: + case OO_StarEqual: + case OO_AmpEqual: + // Binary computation + case OO_Slash: + case OO_Percent: + case OO_Caret: + case OO_Pipe: + case OO_LessLess: + case OO_GreaterGreater: + case OO_AmpAmp: + case OO_PipePipe: + case OO_ArrowStar: + case OO_Comma: + return syntax::NodeKind::BinaryOperatorExpression; + case OO_Tilde: + case OO_Exclaim: + return syntax::NodeKind::PrefixUnaryOperatorExpression; + // Prefix/Postfix increment/decrement + case OO_PlusPlus: + case OO_MinusMinus: + switch (E.getNumArgs()) { + case 1: + return syntax::NodeKind::PrefixUnaryOperatorExpression; + case 2: + return syntax::NodeKind::PostfixUnaryOperatorExpression; + default: + llvm_unreachable("Invalid number of arguments for operator"); + } + // Operators that can be unary or binary + case OO_Plus: + case OO_Minus: + case OO_Star: + case OO_Amp: + switch (E.getNumArgs()) { + case 1: + return syntax::NodeKind::PrefixUnaryOperatorExpression; + case 2: + return syntax::NodeKind::BinaryOperatorExpression; + default: + llvm_unreachable("Invalid number of arguments for operator"); + } + return syntax::NodeKind::BinaryOperatorExpression; + // Not yet supported by SyntaxTree + case OO_New: + case OO_Delete: + case OO_Array_New: + case OO_Array_Delete: + case OO_Coawait: + case OO_Call: + case OO_Subscript: + case OO_Arrow: + return syntax::NodeKind::UnknownExpression; + case OO_Conditional: // not overloadable + case NUM_OVERLOADED_OPERATORS: + case OO_None: + llvm_unreachable("Not an overloadable operator"); + } + llvm_unreachable("Unknown OverloadedOperatorKind enum"); +} + +/// Gets the range of declarator as defined by the C++ grammar. E.g. +/// `int a;` -> range of `a`, +/// `int *a;` -> range of `*a`, +/// `int a[10];` -> range of `a[10]`, +/// `int a[1][2][3];` -> range of `a[1][2][3]`, +/// `int *a = nullptr` -> range of `*a = nullptr`. +/// FIMXE: \p Name must be a source range, e.g. for `operator+`. +static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, + SourceLocation Name, + SourceRange Initializer) { + SourceLocation Start = GetStartLoc().Visit(T); + SourceLocation End = T.getSourceRange().getEnd(); + assert(End.isValid()); + if (Name.isValid()) { + if (Start.isInvalid()) + Start = Name; + if (SM.isBeforeInTranslationUnit(End, Name)) + End = Name; + } + if (Initializer.isValid()) { + auto InitializerEnd = Initializer.getEnd(); + assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || + End == InitializerEnd); + End = InitializerEnd; + } + return SourceRange(Start, End); +} + +namespace { +/// All AST hierarchy roots that can be represented as pointers. +using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; +/// Maintains a mapping from AST to syntax tree nodes. This class will get more +/// complicated as we support more kinds of AST nodes, e.g. TypeLocs. +/// FIXME: expose this as public API. +class ASTToSyntaxMapping { +public: + void add(ASTPtr From, syntax::Tree *To) { + assert(To != nullptr); + assert(!From.isNull()); + + bool Added = Nodes.insert({From, To}).second; + (void)Added; + assert(Added && "mapping added twice"); + } + + syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } + +private: + llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; +}; +} // namespace + /// A helper class for constructing the syntax tree while traversing a clang /// AST. /// @@ -57,30 +271,44 @@ public: } llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } + const SourceManager &sourceManager() const { return Arena.sourceManager(); } /// Populate children for \p New node, assuming it covers tokens from \p /// Range. - void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); - - /// Must be called with the range of each `DeclaratorDecl`. Ensures the - /// corresponding declarator nodes are covered by `SimpleDeclaration`. - void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range); + void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, + ASTPtr From) { + assert(New); + Pending.foldChildren(Arena, Range, New); + if (From) + Mapping.add(From, New); + } + void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, + TypeLoc L) { + // FIXME: add mapping for TypeLocs + foldNode(Range, New, nullptr); + } /// Notifies that we should not consume trailing semicolon when computing /// token range of \p D. - void noticeDeclaratorWithoutSemicolon(Decl *D); + void noticeDeclWithoutSemicolon(Decl *D); /// Mark the \p Child node with a corresponding \p Role. All marked children /// should be consumed by foldNode. - /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), - /// wraps expressions into expression statement. + /// When called on expressions (clang::Expr is derived from clang::Stmt), + /// wraps expressions into expression statement. void markStmtChild(Stmt *Child, NodeRole Role); /// Should be called for expressions in non-statement position to avoid /// wrapping into expression statement. void markExprChild(Expr *Child, NodeRole Role); - /// Set role for a token starting at \p Loc. void markChildToken(SourceLocation Loc, NodeRole R); + /// Set role for \p T. + void markChildToken(const syntax::Token *T, NodeRole R); + + /// Set role for \p N. + void markChild(syntax::Node *N, NodeRole R); + /// Set role for the syntax node matching \p N. + void markChild(ASTPtr N, NodeRole R); /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { @@ -97,8 +325,16 @@ public: return TU; } - /// getRange() finds the syntax tokens corresponding to the passed source - /// locations. + /// Finds a token starting at \p L. The token must exist if \p L is valid. + const syntax::Token *findToken(SourceLocation L) const; + + /// Finds the syntax tokens corresponding to the \p SourceRange. + llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const { + assert(Range.isValid()); + return getRange(Range.getBegin(), Range.getEnd()); + } + + /// Finds the syntax tokens corresponding to the passed source locations. /// \p First is the start position of the first token and \p Last is the start /// position of the last token. llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, @@ -109,23 +345,62 @@ public: Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } - llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { - auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); - if (llvm::isa<NamespaceDecl>(D)) - return Tokens; - if (DeclsWithoutSemicolons.count(D)) - return Tokens; - // FIXME: do not consume trailing semicolon on function definitions. - // Most declarations own a semicolon in syntax trees, but not in clang AST. - return withTrailingSemicolon(Tokens); + + llvm::ArrayRef<syntax::Token> + getTemplateRange(const ClassTemplateSpecializationDecl *D) const { + auto Tokens = getRange(D->getSourceRange()); + return maybeAppendSemicolon(Tokens, D); } + + /// Returns true if \p D is the last declarator in a chain and is thus + /// reponsible for creating SimpleDeclaration for the whole chain. + template <class T> + bool isResponsibleForCreatingDeclaration(const T *D) const { + static_assert((std::is_base_of<DeclaratorDecl, T>::value || + std::is_base_of<TypedefNameDecl, T>::value), + "only DeclaratorDecl and TypedefNameDecl are supported."); + + const Decl *Next = D->getNextDeclInContext(); + + // There's no next sibling, this one is responsible. + if (Next == nullptr) { + return true; + } + const auto *NextT = llvm::dyn_cast<T>(Next); + + // Next sibling is not the same type, this one is responsible. + if (NextT == nullptr) { + return true; + } + // Next sibling doesn't begin at the same loc, it must be a different + // declaration, so this declarator is responsible. + if (NextT->getBeginLoc() != D->getBeginLoc()) { + return true; + } + + // NextT is a member of the same declaration, and we need the last member to + // create declaration. This one is not responsible. + return false; + } + + llvm::ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { + llvm::ArrayRef<clang::syntax::Token> Tokens; + // We want to drop the template parameters for specializations. + if (const auto *S = llvm::dyn_cast<TagDecl>(D)) + Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); + else + Tokens = getRange(D->getSourceRange()); + return maybeAppendSemicolon(Tokens, D); + } + llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { - return getRange(E->getBeginLoc(), E->getEndLoc()); + return getRange(E->getSourceRange()); } + /// Find the adjusted range for the statement, consuming the trailing /// semicolon when needed. llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { - auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); + auto Tokens = getRange(S->getSourceRange()); if (isa<CompoundStmt>(S)) return Tokens; @@ -138,17 +413,31 @@ public: private: llvm::ArrayRef<syntax::Token> + maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens, + const Decl *D) const { + if (llvm::isa<NamespaceDecl>(D)) + return Tokens; + if (DeclsWithoutSemicolons.count(D)) + return Tokens; + // FIXME: do not consume trailing semicolon on function definitions. + // Most declarations own a semicolon in syntax trees, but not in clang AST. + return withTrailingSemicolon(Tokens); + } + + llvm::ArrayRef<syntax::Token> withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { assert(!Tokens.empty()); assert(Tokens.back().kind() != tok::eof); - // (!) we never consume 'eof', so looking at the next token is ok. + // We never consume 'eof', so looking at the next token is ok. if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); return Tokens; } - /// Finds a token starting at \p L. The token must exist. - const syntax::Token *findToken(SourceLocation L) const; + void setRole(syntax::Node *N, NodeRole R) { + assert(N->role() == NodeRole::Detached); + N->setRole(R); + } /// A collection of trees covering the input tokens. /// When created, each tree corresponds to a single token in the file. @@ -166,12 +455,10 @@ private: auto *L = new (A.allocator()) syntax::Leaf(&T); L->Original = true; L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); - Trees.insert(Trees.end(), {&T, NodeAndRole{L}}); + Trees.insert(Trees.end(), {&T, L}); } } - ~Forest() { assert(DelayedFolds.empty()); } - void assignRole(llvm::ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { assert(!Range.empty()); @@ -181,56 +468,49 @@ private: assert((std::next(It) == Trees.end() || std::next(It)->first == Range.end()) && "no child with the specified range"); - It->second.Role = Role; + assert(It->second->role() == NodeRole::Detached && + "re-assigning role for a child"); + It->second->setRole(Role); } /// Add \p Node to the forest and attach child nodes based on \p Tokens. void foldChildren(const syntax::Arena &A, llvm::ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) { - // Execute delayed folds inside `Tokens`. - auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); - auto It = BeginExecuted; - for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) - foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End), - It->second.Node); - DelayedFolds.erase(BeginExecuted, It); - // Attach children to `Node`. - foldChildrenEager(A, Tokens, Node); - } + assert(Node->firstChild() == nullptr && "node already has children"); - /// Schedule a call to `foldChildren` that will only be executed when - /// containing node is folded. The range of delayed nodes can be extended by - /// calling `extendDelayedFold`. Only one delayed node for each starting - /// token is allowed. - void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens, - syntax::Tree *Node) { - assert(!Tokens.empty()); - bool Inserted = - DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) - .second; - (void)Inserted; - assert(Inserted && "Multiple delayed folds start at the same token"); - } + auto *FirstToken = Tokens.begin(); + auto BeginChildren = Trees.lower_bound(FirstToken); - /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends - /// its endpoint to `ExtendedRange.end()` and returns true. - /// Otherwise, returns false. - bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) { - assert(!ExtendedRange.empty()); - auto It = DelayedFolds.find(ExtendedRange.data()); - if (It == DelayedFolds.end()) - return false; - assert(It->second.End <= ExtendedRange.end()); - It->second.End = ExtendedRange.end(); - return true; + assert((BeginChildren == Trees.end() || + BeginChildren->first == FirstToken) && + "fold crosses boundaries of existing subtrees"); + auto EndChildren = Trees.lower_bound(Tokens.end()); + assert( + (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && + "fold crosses boundaries of existing subtrees"); + + // We need to go in reverse order, because we can only prepend. + for (auto It = EndChildren; It != BeginChildren; --It) { + auto *C = std::prev(It)->second; + if (C->role() == NodeRole::Detached) + C->setRole(NodeRole::Unknown); + Node->prependChildLowLevel(C); + } + + // Mark that this node came from the AST and is backed by the source code. + Node->Original = true; + Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); + + Trees.erase(BeginChildren, EndChildren); + Trees.insert({FirstToken, Node}); } // EXPECTS: all tokens were consumed and are owned by a single root node. syntax::Node *finalize() && { assert(Trees.size() == 1); - auto *Root = Trees.begin()->second.Node; + auto *Root = Trees.begin()->second; Trees = {}; return Root; } @@ -243,66 +523,19 @@ private: ? (std::next(It)->first - It->first) : A.tokenBuffer().expandedTokens().end() - It->first; - R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", - It->second.Node->kind(), - It->first->text(A.sourceManager()), CoveredTokens); - R += It->second.Node->dump(A); + R += std::string(llvm::formatv( + "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), + It->first->text(A.sourceManager()), CoveredTokens)); + R += It->second->dump(A); } return R; } private: - /// Implementation detail of `foldChildren`, does acutal folding ignoring - /// delayed folds. - void foldChildrenEager(const syntax::Arena &A, - llvm::ArrayRef<syntax::Token> Tokens, - syntax::Tree *Node) { - assert(Node->firstChild() == nullptr && "node already has children"); - - auto *FirstToken = Tokens.begin(); - auto BeginChildren = Trees.lower_bound(FirstToken); - assert((BeginChildren == Trees.end() || - BeginChildren->first == FirstToken) && - "fold crosses boundaries of existing subtrees"); - auto EndChildren = Trees.lower_bound(Tokens.end()); - assert( - (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && - "fold crosses boundaries of existing subtrees"); - - // (!) we need to go in reverse order, because we can only prepend. - for (auto It = EndChildren; It != BeginChildren; --It) - Node->prependChildLowLevel(std::prev(It)->second.Node, - std::prev(It)->second.Role); - - // Mark that this node came from the AST and is backed by the source code. - Node->Original = true; - Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); - - Trees.erase(BeginChildren, EndChildren); - Trees.insert({FirstToken, NodeAndRole(Node)}); - } - /// A with a role that should be assigned to it when adding to a parent. - struct NodeAndRole { - explicit NodeAndRole(syntax::Node *Node) - : Node(Node), Role(NodeRole::Unknown) {} - - syntax::Node *Node; - NodeRole Role; - }; - /// Maps from the start token to a subtree starting at that token. /// Keys in the map are pointers into the array of expanded tokens, so /// pointer order corresponds to the order of preprocessor tokens. - /// FIXME: storing the end tokens is redundant. - /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. - std::map<const syntax::Token *, NodeAndRole> Trees; - - /// See documentation of `foldChildrenDelayed` for details. - struct DelayedFold { - const syntax::Token *End = nullptr; - syntax::Tree *Node = nullptr; - }; - std::map<const syntax::Token *, DelayedFold> DelayedFolds; + std::map<const syntax::Token *, syntax::Node *> Trees; }; /// For debugging purposes. @@ -314,49 +547,91 @@ private: LocationToToken; Forest Pending; llvm::DenseSet<Decl *> DeclsWithoutSemicolons; + ASTToSyntaxMapping Mapping; }; namespace { class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { public: - explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) - : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} + explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) + : Builder(Builder), Context(Context) {} bool shouldTraversePostOrder() const { return true; } - bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) { - // Ensure declarators are covered by SimpleDeclaration. - Builder.noticeDeclaratorRange(Builder.getRange(D)); - // FIXME: build nodes for the declarator too. - return true; + bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { + return processDeclaratorAndDeclaration(DD); } - bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { - // Also a declarator. - Builder.noticeDeclaratorRange(Builder.getRange(D)); - // FIXME: build nodes for the declarator too. - return true; + + bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { + return processDeclaratorAndDeclaration(TD); } bool VisitDecl(Decl *D) { assert(!D->isImplicit()); - Builder.foldNode(Builder.getRange(D), - new (allocator()) syntax::UnknownDeclaration()); + Builder.foldNode(Builder.getDeclarationRange(D), + new (allocator()) syntax::UnknownDeclaration(), D); + return true; + } + + // RAV does not call WalkUpFrom* on explicit instantiations, so we have to + // override Traverse. + // FIXME: make RAV call WalkUpFrom* instead. + bool + TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { + if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) + return false; + if (C->isExplicitSpecialization()) + return true; // we are only interested in explicit instantiations. + auto *Declaration = + cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); + foldExplicitTemplateInstantiation( + Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), + Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); + return true; + } + + bool WalkUpFromTemplateDecl(TemplateDecl *S) { + foldTemplateDeclaration( + Builder.getDeclarationRange(S), + Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), + Builder.getDeclarationRange(S->getTemplatedDecl()), S); return true; } bool WalkUpFromTagDecl(TagDecl *C) { // FIXME: build the ClassSpecifier node. - if (C->isFreeStanding()) { - // Class is a declaration specifier and needs a spanning declaration node. - Builder.foldNode(Builder.getRange(C), - new (allocator()) syntax::SimpleDeclaration); + if (!C->isFreeStanding()) { + assert(C->getNumTemplateParameterLists() == 0); return true; } + handleFreeStandingTagDecl(C); return true; } + syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { + assert(C->isFreeStanding()); + // Class is a declaration specifier and needs a spanning declaration node. + auto DeclarationRange = Builder.getDeclarationRange(C); + syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; + Builder.foldNode(DeclarationRange, Result, nullptr); + + // Build TemplateDeclaration nodes if we had template parameters. + auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { + const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); + auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); + Result = + foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); + DeclarationRange = R; + }; + if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) + ConsumeTemplateParameters(*S->getTemplateParameters()); + for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) + ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); + return Result; + } + bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { - // (!) we do not want to call VisitDecl(), the declaration for translation + // We do not want to call VisitDecl(), the declaration for translation // unit is built by finalize(). return true; } @@ -370,14 +645,14 @@ public: Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::CompoundStatement); + new (allocator()) syntax::CompoundStatement, S); return true; } // Some statements are not yet handled by syntax trees. bool WalkUpFromStmt(Stmt *S) { Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::UnknownStatement); + new (allocator()) syntax::UnknownStatement, S); return true; } @@ -386,27 +661,28 @@ public: // RAV traverses it as a statement, we produce invalid node kinds in that // case. // FIXME: should do this in RAV instead? - if (S->getInit() && !TraverseStmt(S->getInit())) - return false; - if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) - return false; - if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) - return false; - if (S->getBody() && !TraverseStmt(S->getBody())) - return false; - return true; + bool Result = [&, this]() { + if (S->getInit() && !TraverseStmt(S->getInit())) + return false; + if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) + return false; + if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) + return false; + if (S->getBody() && !TraverseStmt(S->getBody())) + return false; + return true; + }(); + WalkUpFromCXXForRangeStmt(S); + return Result; } bool TraverseStmt(Stmt *S) { if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { // We want to consume the semicolon, make sure SimpleDeclaration does not. for (auto *D : DS->decls()) - Builder.noticeDeclaratorWithoutSemicolon(D); + Builder.noticeDeclWithoutSemicolon(D); } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { - // (!) do not recurse into subexpressions. - // we do not have syntax trees for expressions yet, so we only want to see - // the first top-level expression. - return WalkUpFromExpr(E->IgnoreImplicit()); + return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); } return RecursiveASTVisitor::TraverseStmt(S); } @@ -415,19 +691,306 @@ public: bool WalkUpFromExpr(Expr *E) { assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); Builder.foldNode(Builder.getExprRange(E), - new (allocator()) syntax::UnknownExpression); + new (allocator()) syntax::UnknownExpression, E); + return true; + } + + syntax::NestedNameSpecifier * + BuildNestedNameSpecifier(NestedNameSpecifierLoc QualifierLoc) { + if (!QualifierLoc) + return nullptr; + for (auto it = QualifierLoc; it; it = it.getPrefix()) { + auto *NS = new (allocator()) syntax::NameSpecifier; + Builder.foldNode(Builder.getRange(it.getLocalSourceRange()), NS, nullptr); + Builder.markChild(NS, syntax::NodeRole::NestedNameSpecifier_specifier); + } + auto *NNS = new (allocator()) syntax::NestedNameSpecifier; + Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), NNS, + nullptr); + return NNS; + } + + bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { + // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node + // referencing the location of the UDL suffix (`_w` in `1.2_w`). The + // UDL suffix location does not point to the beginning of a token, so we + // can't represent the UDL suffix as a separate syntax tree node. + + return WalkUpFromUserDefinedLiteral(S); + } + + syntax::UserDefinedLiteralExpression * + buildUserDefinedLiteral(UserDefinedLiteral *S) { + switch (S->getLiteralOperatorKind()) { + case clang::UserDefinedLiteral::LOK_Integer: + return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Floating: + return new (allocator()) syntax::FloatUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Character: + return new (allocator()) syntax::CharUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_String: + return new (allocator()) syntax::StringUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Raw: + case clang::UserDefinedLiteral::LOK_Template: + // For raw literal operator and numeric literal operator template we + // cannot get the type of the operand in the semantic AST. We get this + // information from the token. As integer and floating point have the same + // token kind, we run `NumericLiteralParser` again to distinguish them. + auto TokLoc = S->getBeginLoc(); + auto TokSpelling = + Builder.findToken(TokLoc)->text(Context.getSourceManager()); + auto Literal = + NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), + Context.getLangOpts(), Context.getTargetInfo(), + Context.getDiagnostics()); + if (Literal.isIntegerLiteral()) + return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; + else { + assert(Literal.isFloatingLiteral()); + return new (allocator()) syntax::FloatUserDefinedLiteralExpression; + } + } + llvm_unreachable("Unknown literal operator kind."); + } + + bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { + Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); + return true; + } + + bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { + if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc())) + Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier); + + auto *unqualifiedId = new (allocator()) syntax::UnqualifiedId; + // Get `UnqualifiedId` from `DeclRefExpr`. + // FIXME: Extract this logic so that it can be used by `MemberExpr`, + // and other semantic constructs, now it is tied to `DeclRefExpr`. + if (!S->hasExplicitTemplateArgs()) { + Builder.foldNode(Builder.getRange(S->getNameInfo().getSourceRange()), + unqualifiedId, nullptr); + } else { + auto templateIdSourceRange = + SourceRange(S->getNameInfo().getBeginLoc(), S->getRAngleLoc()); + Builder.foldNode(Builder.getRange(templateIdSourceRange), unqualifiedId, + nullptr); + } + Builder.markChild(unqualifiedId, syntax::NodeRole::IdExpression_id); + + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::IdExpression, S); + return true; + } + + bool WalkUpFromParenExpr(ParenExpr *S) { + Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); + Builder.markExprChild(S->getSubExpr(), + syntax::NodeRole::ParenExpression_subExpression); + Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::ParenExpression, S); + return true; + } + + bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::IntegerLiteralExpression, S); + return true; + } + + bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::CharacterLiteralExpression, S); + return true; + } + + bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::FloatingLiteralExpression, S); + return true; + } + + bool WalkUpFromStringLiteral(StringLiteral *S) { + Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::StringLiteralExpression, S); + return true; + } + + bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::BoolLiteralExpression, S); + return true; + } + + bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::CxxNullPtrExpression, S); + return true; + } + + bool WalkUpFromUnaryOperator(UnaryOperator *S) { + Builder.markChildToken(S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild(S->getSubExpr(), + syntax::NodeRole::UnaryOperatorExpression_operand); + + if (S->isPostfix()) + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::PostfixUnaryOperatorExpression, + S); + else + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::PrefixUnaryOperatorExpression, + S); + + return true; + } + + bool WalkUpFromBinaryOperator(BinaryOperator *S) { + Builder.markExprChild( + S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide); + Builder.markChildToken(S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild( + S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::BinaryOperatorExpression, S); return true; } + bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { + if (getOperatorNodeKind(*S) == + syntax::NodeKind::PostfixUnaryOperatorExpression) { + // A postfix unary operator is declared as taking two operands. The + // second operand is used to distinguish from its prefix counterpart. In + // the semantic AST this "phantom" operand is represented as a + // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this + // operand because it does not correspond to anything written in source + // code + for (auto *child : S->children()) { + if (child->getSourceRange().isInvalid()) + continue; + if (!TraverseStmt(child)) + return false; + } + return WalkUpFromCXXOperatorCallExpr(S); + } else + return RecursiveASTVisitor::TraverseCXXOperatorCallExpr(S); + } + + bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { + switch (getOperatorNodeKind(*S)) { + case syntax::NodeKind::BinaryOperatorExpression: + Builder.markExprChild( + S->getArg(0), + syntax::NodeRole::BinaryOperatorExpression_leftHandSide); + Builder.markChildToken( + S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild( + S->getArg(1), + syntax::NodeRole::BinaryOperatorExpression_rightHandSide); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::BinaryOperatorExpression, S); + return true; + case syntax::NodeKind::PrefixUnaryOperatorExpression: + Builder.markChildToken( + S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild(S->getArg(0), + syntax::NodeRole::UnaryOperatorExpression_operand); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::PrefixUnaryOperatorExpression, + S); + return true; + case syntax::NodeKind::PostfixUnaryOperatorExpression: + Builder.markChildToken( + S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild(S->getArg(0), + syntax::NodeRole::UnaryOperatorExpression_operand); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::PostfixUnaryOperatorExpression, + S); + return true; + case syntax::NodeKind::UnknownExpression: + return RecursiveASTVisitor::WalkUpFromCXXOperatorCallExpr(S); + default: + llvm_unreachable("getOperatorNodeKind() does not return this value"); + } + } + bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { - auto Tokens = Builder.getRange(S); + auto Tokens = Builder.getDeclarationRange(S); if (Tokens.front().kind() == tok::coloncolon) { // Handle nested namespace definitions. Those start at '::' token, e.g. // namespace a^::b {} // FIXME: build corresponding nodes for the name of this namespace. return true; } - Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition); + Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); + return true; + } + + bool TraverseParenTypeLoc(ParenTypeLoc L) { + // We reverse order of traversal to get the proper syntax structure. + if (!WalkUpFromParenTypeLoc(L)) + return false; + return TraverseTypeLoc(L.getInnerLoc()); + } + + bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { + Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); + Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); + Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), + new (allocator()) syntax::ParenDeclarator, L); + return true; + } + + // Declarator chunks, they are produced by type locs and some clang::Decls. + bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { + Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); + Builder.markExprChild(L.getSizeExpr(), + syntax::NodeRole::ArraySubscript_sizeExpression); + Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); + Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), + new (allocator()) syntax::ArraySubscript, L); + return true; + } + + bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { + Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); + for (auto *P : L.getParams()) { + Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter); + } + Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); + Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), + new (allocator()) syntax::ParametersAndQualifiers, L); + return true; + } + + bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { + if (!L.getTypePtr()->hasTrailingReturn()) + return WalkUpFromFunctionTypeLoc(L); + + auto *TrailingReturnTokens = BuildTrailingReturn(L); + // Finish building the node for parameters. + Builder.markChild(TrailingReturnTokens, + syntax::NodeRole::ParametersAndQualifiers_trailingReturn); + return WalkUpFromFunctionTypeLoc(L); + } + + bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { + auto SR = L.getLocalSourceRange(); + Builder.foldNode(Builder.getRange(SR), + new (allocator()) syntax::MemberPointer, L); return true; } @@ -436,13 +999,13 @@ public: // and fold resulting nodes. bool WalkUpFromDeclStmt(DeclStmt *S) { Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::DeclarationStatement); + new (allocator()) syntax::DeclarationStatement, S); return true; } bool WalkUpFromNullStmt(NullStmt *S) { Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::EmptyStatement); + new (allocator()) syntax::EmptyStatement, S); return true; } @@ -451,7 +1014,7 @@ public: syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::SwitchStatement); + new (allocator()) syntax::SwitchStatement, S); return true; } @@ -461,7 +1024,7 @@ public: Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::CaseStatement); + new (allocator()) syntax::CaseStatement, S); return true; } @@ -470,7 +1033,7 @@ public: syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::DefaultStatement); + new (allocator()) syntax::DefaultStatement, S); return true; } @@ -483,7 +1046,7 @@ public: Builder.markStmtChild(S->getElse(), syntax::NodeRole::IfStatement_elseStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::IfStatement); + new (allocator()) syntax::IfStatement, S); return true; } @@ -491,7 +1054,7 @@ public: Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::ForStatement); + new (allocator()) syntax::ForStatement, S); return true; } @@ -500,7 +1063,7 @@ public: syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::WhileStatement); + new (allocator()) syntax::WhileStatement, S); return true; } @@ -508,7 +1071,7 @@ public: Builder.markChildToken(S->getContinueLoc(), syntax::NodeRole::IntroducerKeyword); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::ContinueStatement); + new (allocator()) syntax::ContinueStatement, S); return true; } @@ -516,7 +1079,7 @@ public: Builder.markChildToken(S->getBreakLoc(), syntax::NodeRole::IntroducerKeyword); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::BreakStatement); + new (allocator()) syntax::BreakStatement, S); return true; } @@ -526,7 +1089,7 @@ public: Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnStatement_value); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::ReturnStatement); + new (allocator()) syntax::ReturnStatement, S); return true; } @@ -534,13 +1097,13 @@ public: Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::RangeBasedForStatement); + new (allocator()) syntax::RangeBasedForStatement, S); return true; } bool WalkUpFromEmptyDecl(EmptyDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::EmptyDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::EmptyDeclaration, S); return true; } @@ -549,76 +1112,175 @@ public: syntax::NodeRole::StaticAssertDeclaration_condition); Builder.markExprChild(S->getMessage(), syntax::NodeRole::StaticAssertDeclaration_message); - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::StaticAssertDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::StaticAssertDeclaration, S); return true; } bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::LinkageSpecificationDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::LinkageSpecificationDeclaration, + S); return true; } bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::NamespaceAliasDefinition); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::NamespaceAliasDefinition, S); return true; } bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::UsingNamespaceDirective); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::UsingNamespaceDirective, S); return true; } bool WalkUpFromUsingDecl(UsingDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::UsingDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::UsingDeclaration, S); return true; } bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::UsingDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::UsingDeclaration, S); return true; } bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::UsingDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::UsingDeclaration, S); return true; } bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::TypeAliasDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::TypeAliasDeclaration, S); return true; } private: + template <class T> SourceLocation getQualifiedNameStart(T *D) { + static_assert((std::is_base_of<DeclaratorDecl, T>::value || + std::is_base_of<TypedefNameDecl, T>::value), + "only DeclaratorDecl and TypedefNameDecl are supported."); + + auto DN = D->getDeclName(); + bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); + if (IsAnonymous) + return SourceLocation(); + + if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D)) { + if (DD->getQualifierLoc()) { + return DD->getQualifierLoc().getBeginLoc(); + } + } + + return D->getLocation(); + } + + SourceRange getInitializerRange(Decl *D) { + if (auto *V = llvm::dyn_cast<VarDecl>(D)) { + auto *I = V->getInit(); + // Initializers in range-based-for are not part of the declarator + if (I && !V->isCXXForRangeDecl()) + return I->getSourceRange(); + } + + return SourceRange(); + } + + /// Folds SimpleDeclarator node (if present) and in case this is the last + /// declarator in the chain it also folds SimpleDeclaration node. + template <class T> bool processDeclaratorAndDeclaration(T *D) { + SourceRange Initializer = getInitializerRange(D); + auto Range = getDeclaratorRange(Builder.sourceManager(), + D->getTypeSourceInfo()->getTypeLoc(), + getQualifiedNameStart(D), Initializer); + + // There doesn't have to be a declarator (e.g. `void foo(int)` only has + // declaration, but no declarator). + if (Range.getBegin().isValid()) { + auto *N = new (allocator()) syntax::SimpleDeclarator; + Builder.foldNode(Builder.getRange(Range), N, nullptr); + Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); + } + + if (Builder.isResponsibleForCreatingDeclaration(D)) { + Builder.foldNode(Builder.getDeclarationRange(D), + new (allocator()) syntax::SimpleDeclaration, D); + } + return true; + } + + /// Returns the range of the built node. + syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { + assert(L.getTypePtr()->hasTrailingReturn()); + + auto ReturnedType = L.getReturnLoc(); + // Build node for the declarator, if any. + auto ReturnDeclaratorRange = + getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, + /*Name=*/SourceLocation(), + /*Initializer=*/SourceLocation()); + syntax::SimpleDeclarator *ReturnDeclarator = nullptr; + if (ReturnDeclaratorRange.isValid()) { + ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; + Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), + ReturnDeclarator, nullptr); + } + + // Build node for trailing return type. + auto Return = Builder.getRange(ReturnedType.getSourceRange()); + const auto *Arrow = Return.begin() - 1; + assert(Arrow->kind() == tok::arrow); + auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); + Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); + if (ReturnDeclarator) + Builder.markChild(ReturnDeclarator, + syntax::NodeRole::TrailingReturnType_declarator); + auto *R = new (allocator()) syntax::TrailingReturnType; + Builder.foldNode(Tokens, R, L); + return R; + } + + void foldExplicitTemplateInstantiation( + ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, + const syntax::Token *TemplateKW, + syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { + assert(!ExternKW || ExternKW->kind() == tok::kw_extern); + assert(TemplateKW && TemplateKW->kind() == tok::kw_template); + Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); + Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); + Builder.markChild( + InnerDeclaration, + syntax::NodeRole::ExplicitTemplateInstantiation_declaration); + Builder.foldNode( + Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); + } + + syntax::TemplateDeclaration *foldTemplateDeclaration( + ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, + ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { + assert(TemplateKW && TemplateKW->kind() == tok::kw_template); + Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); + + auto *N = new (allocator()) syntax::TemplateDeclaration; + Builder.foldNode(Range, N, From); + Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration); + return N; + } + /// A small helper to save some typing. llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } syntax::TreeBuilder &Builder; - const LangOptions &LangOpts; + const ASTContext &Context; }; } // namespace -void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, - syntax::Tree *New) { - Pending.foldChildren(Arena, Range, New); -} - -void syntax::TreeBuilder::noticeDeclaratorRange( - llvm::ArrayRef<syntax::Token> Range) { - if (Pending.extendDelayedFold(Range)) - return; - Pending.foldChildrenDelayed(Range, - new (allocator()) syntax::SimpleDeclaration); -} - -void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) { +void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { DeclsWithoutSemicolons.insert(D); } @@ -628,31 +1290,55 @@ void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { Pending.assignRole(*findToken(Loc), Role); } +void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { + if (!T) + return; + Pending.assignRole(*T, R); +} + +void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { + assert(N); + setRole(N, R); +} + +void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { + auto *SN = Mapping.find(N); + assert(SN != nullptr); + setRole(SN, R); +} + void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { if (!Child) return; - auto Range = getStmtRange(Child); - // This is an expression in a statement position, consume the trailing - // semicolon and form an 'ExpressionStatement' node. - if (auto *E = dyn_cast<Expr>(Child)) { - Pending.assignRole(getExprRange(E), - NodeRole::ExpressionStatement_expression); - // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. - Pending.foldChildren(Arena, Range, - new (allocator()) syntax::ExpressionStatement); - } - Pending.assignRole(Range, Role); + syntax::Tree *ChildNode; + if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { + // This is an expression in a statement position, consume the trailing + // semicolon and form an 'ExpressionStatement' node. + markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression); + ChildNode = new (allocator()) syntax::ExpressionStatement; + // (!) 'getStmtRange()' ensures this covers a trailing semicolon. + Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); + } else { + ChildNode = Mapping.find(Child); + } + assert(ChildNode != nullptr); + setRole(ChildNode, Role); } void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { if (!Child) return; + Child = Child->IgnoreImplicit(); - Pending.assignRole(getExprRange(Child), Role); + syntax::Tree *ChildNode = Mapping.find(Child); + assert(ChildNode != nullptr); + setRole(ChildNode, Role); } const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { + if (L.isInvalid()) + return nullptr; auto It = LocationToToken.find(L.getRawEncoding()); assert(It != LocationToToken.end()); return It->second; diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp index 72458528202e5..24048b297a112 100644 --- a/clang/lib/Tooling/Syntax/Mutations.cpp +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -35,7 +35,7 @@ public: assert(!New->isDetached()); assert(Role != NodeRole::Detached); - New->Role = static_cast<unsigned>(Role); + New->setRole(Role); auto *P = Anchor->parent(); P->replaceChildRangeLowLevel(Anchor, Anchor, New); diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 5b0c5107c134c..2435ae0a91dd6 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -18,6 +18,38 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "TranslationUnit"; case NodeKind::UnknownExpression: return OS << "UnknownExpression"; + case NodeKind::ParenExpression: + return OS << "ParenExpression"; + case NodeKind::IntegerLiteralExpression: + return OS << "IntegerLiteralExpression"; + case NodeKind::CharacterLiteralExpression: + return OS << "CharacterLiteralExpression"; + case NodeKind::FloatingLiteralExpression: + return OS << "FloatingLiteralExpression"; + case NodeKind::StringLiteralExpression: + return OS << "StringLiteralExpression"; + case NodeKind::BoolLiteralExpression: + return OS << "BoolLiteralExpression"; + case NodeKind::CxxNullPtrExpression: + return OS << "CxxNullPtrExpression"; + case NodeKind::IntegerUserDefinedLiteralExpression: + return OS << "IntegerUserDefinedLiteralExpression"; + case NodeKind::FloatUserDefinedLiteralExpression: + return OS << "FloatUserDefinedLiteralExpression"; + case NodeKind::CharUserDefinedLiteralExpression: + return OS << "CharUserDefinedLiteralExpression"; + case NodeKind::StringUserDefinedLiteralExpression: + return OS << "StringUserDefinedLiteralExpression"; + case NodeKind::PrefixUnaryOperatorExpression: + return OS << "PrefixUnaryOperatorExpression"; + case NodeKind::PostfixUnaryOperatorExpression: + return OS << "PostfixUnaryOperatorExpression"; + case NodeKind::BinaryOperatorExpression: + return OS << "BinaryOperatorExpression"; + case NodeKind::UnqualifiedId: + return OS << "UnqualifiedId"; + case NodeKind::IdExpression: + return OS << "IdExpression"; case NodeKind::UnknownStatement: return OS << "UnknownStatement"; case NodeKind::DeclarationStatement: @@ -58,6 +90,10 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "LinkageSpecificationDeclaration"; case NodeKind::SimpleDeclaration: return OS << "SimpleDeclaration"; + case NodeKind::TemplateDeclaration: + return OS << "TemplateDeclaration"; + case NodeKind::ExplicitTemplateInstantiation: + return OS << "ExplicitTemplateInstantiation"; case NodeKind::NamespaceDefinition: return OS << "NamespaceDefinition"; case NodeKind::NamespaceAliasDefinition: @@ -68,6 +104,22 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "UsingDeclaration"; case NodeKind::TypeAliasDeclaration: return OS << "TypeAliasDeclaration"; + case NodeKind::SimpleDeclarator: + return OS << "SimpleDeclarator"; + case NodeKind::ParenDeclarator: + return OS << "ParenDeclarator"; + case NodeKind::ArraySubscript: + return OS << "ArraySubscript"; + case NodeKind::TrailingReturnType: + return OS << "TrailingReturnType"; + case NodeKind::ParametersAndQualifiers: + return OS << "ParametersAndQualifiers"; + case NodeKind::MemberPointer: + return OS << "MemberPointer"; + case NodeKind::NameSpecifier: + return OS << "NameSpecifier"; + case NodeKind::NestedNameSpecifier: + return OS << "NestedNameSpecifier"; } llvm_unreachable("unknown node kind"); } @@ -84,6 +136,12 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { return OS << "CloseParen"; case syntax::NodeRole::IntroducerKeyword: return OS << "IntroducerKeyword"; + case syntax::NodeRole::LiteralToken: + return OS << "LiteralToken"; + case syntax::NodeRole::ArrowToken: + return OS << "ArrowToken"; + case syntax::NodeRole::ExternKeyword: + return OS << "ExternKeyword"; case syntax::NodeRole::BodyStatement: return OS << "BodyStatement"; case syntax::NodeRole::CaseStatement_value: @@ -94,6 +152,14 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { return OS << "IfStatement_elseKeyword"; case syntax::NodeRole::IfStatement_elseStatement: return OS << "IfStatement_elseStatement"; + case syntax::NodeRole::OperatorExpression_operatorToken: + return OS << "OperatorExpression_operatorToken"; + case syntax::NodeRole::UnaryOperatorExpression_operand: + return OS << "UnaryOperatorExpression_operand"; + case syntax::NodeRole::BinaryOperatorExpression_leftHandSide: + return OS << "BinaryOperatorExpression_leftHandSide"; + case syntax::NodeRole::BinaryOperatorExpression_rightHandSide: + return OS << "BinaryOperatorExpression_rightHandSide"; case syntax::NodeRole::ReturnStatement_value: return OS << "ReturnStatement_value"; case syntax::NodeRole::ExpressionStatement_expression: @@ -104,10 +170,126 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { return OS << "StaticAssertDeclaration_condition"; case syntax::NodeRole::StaticAssertDeclaration_message: return OS << "StaticAssertDeclaration_message"; + case syntax::NodeRole::SimpleDeclaration_declarator: + return OS << "SimpleDeclaration_declarator"; + case syntax::NodeRole::TemplateDeclaration_declaration: + return OS << "TemplateDeclaration_declaration"; + case syntax::NodeRole::ExplicitTemplateInstantiation_declaration: + return OS << "ExplicitTemplateInstantiation_declaration"; + case syntax::NodeRole::ArraySubscript_sizeExpression: + return OS << "ArraySubscript_sizeExpression"; + case syntax::NodeRole::TrailingReturnType_declarator: + return OS << "TrailingReturnType_declarator"; + case syntax::NodeRole::ParametersAndQualifiers_parameter: + return OS << "ParametersAndQualifiers_parameter"; + case syntax::NodeRole::ParametersAndQualifiers_trailingReturn: + return OS << "ParametersAndQualifiers_trailingReturn"; + case syntax::NodeRole::IdExpression_id: + return OS << "IdExpression_id"; + case syntax::NodeRole::IdExpression_qualifier: + return OS << "IdExpression_qualifier"; + case syntax::NodeRole::NestedNameSpecifier_specifier: + return OS << "NestedNameSpecifier_specifier"; + case syntax::NodeRole::ParenExpression_subExpression: + return OS << "ParenExpression_subExpression"; } llvm_unreachable("invalid role"); } +std::vector<syntax::NameSpecifier *> syntax::NestedNameSpecifier::specifiers() { + std::vector<syntax::NameSpecifier *> Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + assert(C->role() == syntax::NodeRole::NestedNameSpecifier_specifier); + Children.push_back(llvm::cast<syntax::NameSpecifier>(C)); + } + return Children; +} + +syntax::NestedNameSpecifier *syntax::IdExpression::qualifier() { + return llvm::cast_or_null<syntax::NestedNameSpecifier>( + findChild(syntax::NodeRole::IdExpression_qualifier)); +} + +syntax::UnqualifiedId *syntax::IdExpression::unqualifiedId() { + return llvm::cast_or_null<syntax::UnqualifiedId>( + findChild(syntax::NodeRole::IdExpression_id)); +} + +syntax::Leaf *syntax::ParenExpression::openParen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OpenParen)); +} + +syntax::Expression *syntax::ParenExpression::subExpression() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::ParenExpression_subExpression)); +} + +syntax::Leaf *syntax::ParenExpression::closeParen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::Leaf *syntax::IntegerLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::CharacterLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::FloatingLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::StringLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::BoolLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::CxxNullPtrExpression::nullPtrKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Expression *syntax::BinaryOperatorExpression::lhs() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide)); +} + +syntax::Leaf *syntax::UnaryOperatorExpression::operatorToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OperatorExpression_operatorToken)); +} + +syntax::Expression *syntax::UnaryOperatorExpression::operand() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::UnaryOperatorExpression_operand)); +} + +syntax::Leaf *syntax::BinaryOperatorExpression::operatorToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OperatorExpression_operatorToken)); +} + +syntax::Expression *syntax::BinaryOperatorExpression::rhs() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::BinaryOperatorExpression_rightHandSide)); +} + syntax::Leaf *syntax::SwitchStatement::switchKeyword() { return llvm::cast_or_null<syntax::Leaf>( findChild(syntax::NodeRole::IntroducerKeyword)); @@ -226,8 +408,8 @@ syntax::Leaf *syntax::CompoundStatement::lbrace() { std::vector<syntax::Statement *> syntax::CompoundStatement::statements() { std::vector<syntax::Statement *> Children; for (auto *C = firstChild(); C; C = C->nextSibling()) { - if (C->role() == syntax::NodeRole::CompoundStatement_statement) - Children.push_back(llvm::cast<syntax::Statement>(C)); + assert(C->role() == syntax::NodeRole::CompoundStatement_statement); + Children.push_back(llvm::cast<syntax::Statement>(C)); } return Children; } @@ -246,3 +428,98 @@ syntax::Expression *syntax::StaticAssertDeclaration::message() { return llvm::cast_or_null<syntax::Expression>( findChild(syntax::NodeRole::StaticAssertDeclaration_message)); } + +std::vector<syntax::SimpleDeclarator *> +syntax::SimpleDeclaration::declarators() { + std::vector<syntax::SimpleDeclarator *> Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + if (C->role() == syntax::NodeRole::SimpleDeclaration_declarator) + Children.push_back(llvm::cast<syntax::SimpleDeclarator>(C)); + } + return Children; +} + +syntax::Leaf *syntax::TemplateDeclaration::templateKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Declaration *syntax::TemplateDeclaration::declaration() { + return llvm::cast_or_null<syntax::Declaration>( + findChild(syntax::NodeRole::TemplateDeclaration_declaration)); +} + +syntax::Leaf *syntax::ExplicitTemplateInstantiation::templateKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::ExplicitTemplateInstantiation::externKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::ExternKeyword)); +} + +syntax::Declaration *syntax::ExplicitTemplateInstantiation::declaration() { + return llvm::cast_or_null<syntax::Declaration>( + findChild(syntax::NodeRole::ExplicitTemplateInstantiation_declaration)); +} + +syntax::Leaf *syntax::ParenDeclarator::lparen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OpenParen)); +} + +syntax::Leaf *syntax::ParenDeclarator::rparen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::Leaf *syntax::ArraySubscript::lbracket() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OpenParen)); +} + +syntax::Expression *syntax::ArraySubscript::sizeExpression() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::ArraySubscript_sizeExpression)); +} + +syntax::Leaf *syntax::ArraySubscript::rbracket() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::Leaf *syntax::TrailingReturnType::arrowToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::ArrowToken)); +} + +syntax::SimpleDeclarator *syntax::TrailingReturnType::declarator() { + return llvm::cast_or_null<syntax::SimpleDeclarator>( + findChild(syntax::NodeRole::TrailingReturnType_declarator)); +} + +syntax::Leaf *syntax::ParametersAndQualifiers::lparen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OpenParen)); +} + +std::vector<syntax::SimpleDeclaration *> +syntax::ParametersAndQualifiers::parameters() { + std::vector<syntax::SimpleDeclaration *> Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + if (C->role() == syntax::NodeRole::ParametersAndQualifiers_parameter) + Children.push_back(llvm::cast<syntax::SimpleDeclaration>(C)); + } + return Children; +} + +syntax::Leaf *syntax::ParametersAndQualifiers::rparen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::TrailingReturnType *syntax::ParametersAndQualifiers::trailingReturn() { + return llvm::cast_or_null<syntax::TrailingReturnType>( + findChild(syntax::NodeRole::ParametersAndQualifiers_trailingReturn)); +} diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 3df1c064923a1..c6b904822b8b5 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -35,6 +35,69 @@ using namespace clang; using namespace clang::syntax; +namespace { +// Finds the smallest consecutive subsuquence of Toks that covers R. +llvm::ArrayRef<syntax::Token> +getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R, + const SourceManager &SM) { + if (R.isInvalid()) + return {}; + const syntax::Token *Begin = + llvm::partition_point(Toks, [&](const syntax::Token &T) { + return SM.isBeforeInTranslationUnit(T.location(), R.getBegin()); + }); + const syntax::Token *End = + llvm::partition_point(Toks, [&](const syntax::Token &T) { + return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location()); + }); + if (Begin > End) + return {}; + return {Begin, End}; +} + +// Finds the smallest expansion range that contains expanded tokens First and +// Last, e.g.: +// #define ID(x) x +// ID(ID(ID(a1) a2)) +// ~~ -> a1 +// ~~ -> a2 +// ~~~~~~~~~ -> a1 a2 +SourceRange findCommonRangeForMacroArgs(const syntax::Token &First, + const syntax::Token &Last, + const SourceManager &SM) { + SourceRange Res; + auto FirstLoc = First.location(), LastLoc = Last.location(); + // Keep traversing up the spelling chain as longs as tokens are part of the + // same expansion. + while (!FirstLoc.isFileID() && !LastLoc.isFileID()) { + auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion(); + auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion(); + // Stop if expansions have diverged. + if (ExpInfoFirst.getExpansionLocStart() != + ExpInfoLast.getExpansionLocStart()) + break; + // Do not continue into macro bodies. + if (!ExpInfoFirst.isMacroArgExpansion() || + !ExpInfoLast.isMacroArgExpansion()) + break; + FirstLoc = SM.getImmediateSpellingLoc(FirstLoc); + LastLoc = SM.getImmediateSpellingLoc(LastLoc); + // Update the result afterwards, as we want the tokens that triggered the + // expansion. + Res = {FirstLoc, LastLoc}; + } + // Normally mapping back to expansion location here only changes FileID, as + // we've already found some tokens expanded from the same macro argument, and + // they should map to a consecutive subset of spelled tokens. Unfortunately + // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations + // based on their FileID in addition to offsets. So even though we are + // referring to same tokens, SourceManager might tell us that one is before + // the other if they've got different FileIDs. + return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange(); +} + +} // namespace + syntax::Token::Token(SourceLocation Location, unsigned Length, tok::TokenKind Kind) : Location(Location), Length(Length), Kind(Kind) { @@ -67,7 +130,8 @@ FileRange syntax::Token::range(const SourceManager &SM, auto F = First.range(SM); auto L = Last.range(SM); assert(F.file() == L.file() && "tokens from different files"); - assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens"); + assert((F == L || F.endOffset() <= L.beginOffset()) && + "wrong order of tokens"); return FileRange(F.file(), F.beginOffset(), L.endOffset()); } @@ -120,19 +184,7 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const { } llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const { - if (R.isInvalid()) - return {}; - const Token *Begin = - llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { - return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); - }); - const Token *End = - llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { - return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); - }); - if (Begin > End) - return {}; - return {Begin, End}; + return getTokensCovering(expandedTokens(), R, *SourceMgr); } CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { @@ -161,19 +213,109 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { // Our token could only be produced by the previous mapping. if (It == File.Mappings.begin()) { // No previous mapping, no need to modify offsets. - return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr}; + return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], + /*Mapping=*/nullptr}; } --It; // 'It' now points to last mapping that started before our token. // Check if the token is part of the mapping. if (ExpandedIndex < It->EndExpanded) - return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It}; + return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It}; // Not part of the mapping, use the index from previous mapping to compute the // corresponding spelled token. return { &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], - /*Mapping*/ nullptr}; + /*Mapping=*/nullptr}; +} + +const TokenBuffer::Mapping * +TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F, + const syntax::Token *Spelled) { + assert(F.SpelledTokens.data() <= Spelled); + unsigned SpelledI = Spelled - F.SpelledTokens.data(); + assert(SpelledI < F.SpelledTokens.size()); + + auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) { + return M.BeginSpelled <= SpelledI; + }); + if (It == F.Mappings.begin()) + return nullptr; + --It; + return &*It; +} + +llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1> +TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const { + if (Spelled.empty()) + return {}; + assert(Spelled.front().location().isFileID()); + + auto FID = sourceManager().getFileID(Spelled.front().location()); + auto It = Files.find(FID); + assert(It != Files.end()); + + const MarkedFile &File = It->second; + // `Spelled` must be a subrange of `File.SpelledTokens`. + assert(File.SpelledTokens.data() <= Spelled.data()); + assert(&Spelled.back() <= + File.SpelledTokens.data() + File.SpelledTokens.size()); +#ifndef NDEBUG + auto T1 = Spelled.back().location(); + auto T2 = File.SpelledTokens.back().location(); + assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2)); +#endif + + auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front()); + unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data(); + assert(SpelledFrontI < File.SpelledTokens.size()); + unsigned ExpandedBegin; + if (!FrontMapping) { + // No mapping that starts before the first token of Spelled, we don't have + // to modify offsets. + ExpandedBegin = File.BeginExpanded + SpelledFrontI; + } else if (SpelledFrontI < FrontMapping->EndSpelled) { + // This mapping applies to Spelled tokens. + if (SpelledFrontI != FrontMapping->BeginSpelled) { + // Spelled tokens don't cover the entire mapping, returning empty result. + return {}; // FIXME: support macro arguments. + } + // Spelled tokens start at the beginning of this mapping. + ExpandedBegin = FrontMapping->BeginExpanded; + } else { + // Spelled tokens start after the mapping ends (they start in the hole + // between 2 mappings, or between a mapping and end of the file). + ExpandedBegin = + FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled); + } + + auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back()); + unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data(); + unsigned ExpandedEnd; + if (!BackMapping) { + // No mapping that starts before the last token of Spelled, we don't have to + // modify offsets. + ExpandedEnd = File.BeginExpanded + SpelledBackI + 1; + } else if (SpelledBackI < BackMapping->EndSpelled) { + // This mapping applies to Spelled tokens. + if (SpelledBackI + 1 != BackMapping->EndSpelled) { + // Spelled tokens don't cover the entire mapping, returning empty result. + return {}; // FIXME: support macro arguments. + } + ExpandedEnd = BackMapping->EndExpanded; + } else { + // Spelled tokens end after the mapping ends. + ExpandedEnd = + BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1; + } + + assert(ExpandedBegin < ExpandedTokens.size()); + assert(ExpandedEnd < ExpandedTokens.size()); + // Avoid returning empty ranges. + if (ExpandedBegin == ExpandedEnd) + return {}; + return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin, + ExpandedTokens.data() + ExpandedEnd)}; } llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const { @@ -182,9 +324,20 @@ llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const { return It->second.SpelledTokens; } +const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const { + assert(Loc.isFileID()); + const auto *Tok = llvm::partition_point( + spelledTokens(SourceMgr->getFileID(Loc)), + [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); + if (!Tok || Tok->location() != Loc) + return nullptr; + return Tok; +} + std::string TokenBuffer::Mapping::str() const { - return llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})", - BeginSpelled, EndSpelled, BeginExpanded, EndExpanded); + return std::string( + llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})", + BeginSpelled, EndSpelled, BeginExpanded, EndExpanded)); } llvm::Optional<llvm::ArrayRef<syntax::Token>> @@ -194,8 +347,6 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { if (Expanded.empty()) return llvm::None; - // FIXME: also allow changes uniquely mapping to macro arguments. - const syntax::Token *BeginSpelled; const Mapping *BeginMapping; std::tie(BeginSpelled, BeginMapping) = @@ -213,12 +364,28 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { const MarkedFile &File = Files.find(FID)->second; - // Do not allow changes that cross macro expansion boundaries. + // If both tokens are coming from a macro argument expansion, try and map to + // smallest part of the macro argument. BeginMapping && LastMapping check is + // only for performance, they are a prerequisite for Expanded.front() and + // Expanded.back() being part of a macro arg expansion. + if (BeginMapping && LastMapping && + SourceMgr->isMacroArgExpansion(Expanded.front().location()) && + SourceMgr->isMacroArgExpansion(Expanded.back().location())) { + auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(), + Expanded.back(), *SourceMgr); + // It might be the case that tokens are arguments of different macro calls, + // in that case we should continue with the logic below instead of returning + // an empty range. + if (CommonRange.isValid()) + return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr); + } + + // Do not allow changes that doesn't cover full expansion. unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data(); unsigned EndExpanded = Expanded.end() - ExpandedTokens.data(); - if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded) + if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded) return llvm::None; - if (LastMapping && EndExpanded < LastMapping->EndExpanded) + if (LastMapping && LastMapping->EndExpanded != EndExpanded) return llvm::None; // All is good, return the result. return llvm::makeArrayRef( @@ -253,24 +420,30 @@ TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const { ExpandedTokens.data() + M->EndExpanded); return E; } - llvm::ArrayRef<syntax::Token> syntax::spelledTokensTouching(SourceLocation Loc, - const syntax::TokenBuffer &Tokens) { + llvm::ArrayRef<syntax::Token> Tokens) { assert(Loc.isFileID()); - llvm::ArrayRef<syntax::Token> All = - Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)); + auto *Right = llvm::partition_point( - All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); - bool AcceptRight = Right != All.end() && Right->location() <= Loc; - bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc; + Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); + bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc; + bool AcceptLeft = + Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc; return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0), Right + (AcceptRight ? 1 : 0)); } +llvm::ArrayRef<syntax::Token> +syntax::spelledTokensTouching(SourceLocation Loc, + const syntax::TokenBuffer &Tokens) { + return spelledTokensTouching( + Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); +} + const syntax::Token * syntax::spelledIdentifierTouching(SourceLocation Loc, - const syntax::TokenBuffer &Tokens) { + llvm::ArrayRef<syntax::Token> Tokens) { for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) { if (Tok.kind() == tok::identifier) return &Tok; @@ -278,6 +451,13 @@ syntax::spelledIdentifierTouching(SourceLocation Loc, return nullptr; } +const syntax::Token * +syntax::spelledIdentifierTouching(SourceLocation Loc, + const syntax::TokenBuffer &Tokens) { + return spelledIdentifierTouching( + Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); +} + std::vector<const syntax::Token *> TokenBuffer::macroExpansions(FileID FID) const { auto FileIt = Files.find(FID); @@ -293,7 +473,8 @@ TokenBuffer::macroExpansions(FileID FID) const { return Expansions; } -std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, +std::vector<syntax::Token> syntax::tokenize(const FileRange &FR, + const SourceManager &SM, const LangOptions &LO) { std::vector<syntax::Token> Tokens; IdentifierTable Identifiers(LO); @@ -308,18 +489,28 @@ std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, Tokens.push_back(syntax::Token(T)); }; - Lexer L(FID, SM.getBuffer(FID), SM, LO); + auto SrcBuffer = SM.getBufferData(FR.file()); + Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(), + SrcBuffer.data() + FR.beginOffset(), + // We can't make BufEnd point to FR.endOffset, as Lexer requires a + // null terminated buffer. + SrcBuffer.data() + SrcBuffer.size()); clang::Token T; - while (!L.LexFromRawLexer(T)) + while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset()) AddToken(T); - // 'eof' is only the last token if the input is null-terminated. Never store - // it, for consistency. - if (T.getKind() != tok::eof) + // LexFromRawLexer returns true when it parses the last token of the file, add + // it iff it starts within the range we are interested in. + if (SM.getFileOffset(T.getLocation()) < FR.endOffset()) AddToken(T); return Tokens; } +std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, + const LangOptions &LO) { + return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO); +} + /// Records information reqired to construct mappings for the token buffer that /// we are collecting. class TokenCollector::CollectPPExpansions : public PPCallbacks { @@ -335,14 +526,38 @@ public: SourceRange Range, const MacroArgs *Args) override { if (!Collector) return; - // Only record top-level expansions, not those where: + const auto &SM = Collector->PP.getSourceManager(); + // Only record top-level expansions that directly produce expanded tokens. + // This excludes those where: // - the macro use is inside a macro body, // - the macro appears in an argument to another macro. - if (!MacroNameTok.getLocation().isFileID() || - (LastExpansionEnd.isValid() && - Collector->PP.getSourceManager().isBeforeInTranslationUnit( - Range.getBegin(), LastExpansionEnd))) + // However macro expansion isn't really a tree, it's token rewrite rules, + // so there are other cases, e.g. + // #define B(X) X + // #define A 1 + B + // A(2) + // Both A and B produce expanded tokens, though the macro name 'B' comes + // from an expansion. The best we can do is merge the mappings for both. + + // The *last* token of any top-level macro expansion must be in a file. + // (In the example above, see the closing paren of the expansion of B). + if (!Range.getEnd().isFileID()) + return; + // If there's a current expansion that encloses this one, this one can't be + // top-level. + if (LastExpansionEnd.isValid() && + !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd())) return; + + // If the macro invocation (B) starts in a macro (A) but ends in a file, + // we'll create a merged mapping for A + B by overwriting the endpoint for + // A's startpoint. + if (!Range.getBegin().isFileID()) { + Range.setBegin(SM.getExpansionLoc(Range.getBegin())); + assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) && + "Overlapping macros should have same expansion location"); + } + Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd(); LastExpansionEnd = Range.getEnd(); } @@ -399,197 +614,180 @@ public: } TokenBuffer build() && { - buildSpelledTokens(); - - // Walk over expanded tokens and spelled tokens in parallel, building the - // mappings between those using source locations. - // To correctly recover empty macro expansions, we also take locations - // reported to PPCallbacks::MacroExpands into account as we do not have any - // expanded tokens with source locations to guide us. - - // The 'eof' token is special, it is not part of spelled token stream. We - // handle it separately at the end. assert(!Result.ExpandedTokens.empty()); assert(Result.ExpandedTokens.back().kind() == tok::eof); - for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) { - // (!) I might be updated by the following call. - processExpandedToken(I); - } - // 'eof' not handled in the loop, do it here. - assert(SM.getMainFileID() == - SM.getFileID(Result.ExpandedTokens.back().location())); - fillGapUntil(Result.Files[SM.getMainFileID()], - Result.ExpandedTokens.back().location(), - Result.ExpandedTokens.size() - 1); - Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size(); + // Tokenize every file that contributed tokens to the expanded stream. + buildSpelledTokens(); - // Some files might have unaccounted spelled tokens at the end, add an empty - // mapping for those as they did not have expanded counterparts. - fillGapsAtEndOfFiles(); + // The expanded token stream consists of runs of tokens that came from + // the same source (a macro expansion, part of a file etc). + // Between these runs are the logical positions of spelled tokens that + // didn't expand to anything. + while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) { + // Create empty mappings for spelled tokens that expanded to nothing here. + // May advance NextSpelled, but NextExpanded is unchanged. + discard(); + // Create mapping for a contiguous run of expanded tokens. + // Advances NextExpanded past the run, and NextSpelled accordingly. + unsigned OldPosition = NextExpanded; + advance(); + if (NextExpanded == OldPosition) + diagnoseAdvanceFailure(); + } + // If any tokens remain in any of the files, they didn't expand to anything. + // Create empty mappings up until the end of the file. + for (const auto &File : Result.Files) + discard(File.first); + +#ifndef NDEBUG + for (auto &pair : Result.Files) { + auto &mappings = pair.second.Mappings; + assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1, + const TokenBuffer::Mapping &M2) { + return M1.BeginSpelled < M2.BeginSpelled && + M1.EndSpelled < M2.EndSpelled && + M1.BeginExpanded < M2.BeginExpanded && + M1.EndExpanded < M2.EndExpanded; + })); + } +#endif return std::move(Result); } private: - /// Process the next token in an expanded stream and move corresponding - /// spelled tokens, record any mapping if needed. - /// (!) \p I will be updated if this had to skip tokens, e.g. for macros. - void processExpandedToken(unsigned &I) { - auto L = Result.ExpandedTokens[I].location(); - if (L.isMacroID()) { - processMacroExpansion(SM.getExpansionRange(L), I); - return; + // Consume a sequence of spelled tokens that didn't expand to anything. + // In the simplest case, skips spelled tokens until finding one that produced + // the NextExpanded token, and creates an empty mapping for them. + // If Drain is provided, skips remaining tokens from that file instead. + void discard(llvm::Optional<FileID> Drain = llvm::None) { + SourceLocation Target = + Drain ? SM.getLocForEndOfFile(*Drain) + : SM.getExpansionLoc( + Result.ExpandedTokens[NextExpanded].location()); + FileID File = SM.getFileID(Target); + const auto &SpelledTokens = Result.Files[File].SpelledTokens; + auto &NextSpelled = this->NextSpelled[File]; + + TokenBuffer::Mapping Mapping; + Mapping.BeginSpelled = NextSpelled; + // When dropping trailing tokens from a file, the empty mapping should + // be positioned within the file's expanded-token range (at the end). + Mapping.BeginExpanded = Mapping.EndExpanded = + Drain ? Result.Files[*Drain].EndExpanded : NextExpanded; + // We may want to split into several adjacent empty mappings. + // FlushMapping() emits the current mapping and starts a new one. + auto FlushMapping = [&, this] { + Mapping.EndSpelled = NextSpelled; + if (Mapping.BeginSpelled != Mapping.EndSpelled) + Result.Files[File].Mappings.push_back(Mapping); + Mapping.BeginSpelled = NextSpelled; + }; + + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() < Target) { + // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion) + // then we want to partition our (empty) mapping. + // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target) + SourceLocation KnownEnd = CollectedExpansions.lookup( + SpelledTokens[NextSpelled].location().getRawEncoding()); + if (KnownEnd.isValid()) { + FlushMapping(); // Emits [Start, NextSpelled) + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() <= KnownEnd) + ++NextSpelled; + FlushMapping(); // Emits [NextSpelled, KnownEnd] + // Now the loop contitues and will emit (KnownEnd, Target). + } else { + ++NextSpelled; + } } - if (L.isFileID()) { - auto FID = SM.getFileID(L); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, L, I); + FlushMapping(); + } - // Skip the token. - assert(File.SpelledTokens[NextSpelled[FID]].location() == L && - "no corresponding token in the spelled stream"); - ++NextSpelled[FID]; - return; + // Consumes the NextExpanded token and others that are part of the same run. + // Increases NextExpanded and NextSpelled by at least one, and adds a mapping + // (unless this is a run of file tokens, which we represent with no mapping). + void advance() { + const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded]; + SourceLocation Expansion = SM.getExpansionLoc(Tok.location()); + FileID File = SM.getFileID(Expansion); + const auto &SpelledTokens = Result.Files[File].SpelledTokens; + auto &NextSpelled = this->NextSpelled[File]; + + if (Tok.location().isFileID()) { + // A run of file tokens continues while the expanded/spelled tokens match. + while (NextSpelled < SpelledTokens.size() && + NextExpanded < Result.ExpandedTokens.size() && + SpelledTokens[NextSpelled].location() == + Result.ExpandedTokens[NextExpanded].location()) { + ++NextSpelled; + ++NextExpanded; + } + // We need no mapping for file tokens copied to the expanded stream. + } else { + // We found a new macro expansion. We should have its spelling bounds. + auto End = CollectedExpansions.lookup(Expansion.getRawEncoding()); + assert(End.isValid() && "Macro expansion wasn't captured?"); + + // Mapping starts here... + TokenBuffer::Mapping Mapping; + Mapping.BeginExpanded = NextExpanded; + Mapping.BeginSpelled = NextSpelled; + // ... consumes spelled tokens within bounds we captured ... + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() <= End) + ++NextSpelled; + // ... consumes expanded tokens rooted at the same expansion ... + while (NextExpanded < Result.ExpandedTokens.size() && + SM.getExpansionLoc( + Result.ExpandedTokens[NextExpanded].location()) == Expansion) + ++NextExpanded; + // ... and ends here. + Mapping.EndExpanded = NextExpanded; + Mapping.EndSpelled = NextSpelled; + Result.Files[File].Mappings.push_back(Mapping); } } - /// Skipped expanded and spelled tokens of a macro expansion that covers \p - /// SpelledRange. Add a corresponding mapping. - /// (!) \p I will be the index of the last token in an expansion after this - /// function returns. - void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) { - auto FID = SM.getFileID(SpelledRange.getBegin()); - assert(FID == SM.getFileID(SpelledRange.getEnd())); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, SpelledRange.getBegin(), I); - - // Skip all expanded tokens from the same macro expansion. - unsigned BeginExpanded = I; - for (; I + 1 < Result.ExpandedTokens.size(); ++I) { - auto NextL = Result.ExpandedTokens[I + 1].location(); - if (!NextL.isMacroID() || - SM.getExpansionLoc(NextL) != SpelledRange.getBegin()) - break; + // advance() is supposed to consume at least one token - if not, we crash. + void diagnoseAdvanceFailure() { +#ifndef NDEBUG + // Show the failed-to-map token in context. + for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10; + I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) { + const char *L = + (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " "; + llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n"; } - unsigned EndExpanded = I + 1; - consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded, - EndExpanded, NextSpelled[FID]); +#endif + llvm_unreachable("Couldn't map expanded token to spelled tokens!"); } /// Initializes TokenBuffer::Files and fills spelled tokens and expanded /// ranges for each of the files. void buildSpelledTokens() { for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) { - auto FID = - SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location())); + const auto &Tok = Result.ExpandedTokens[I]; + auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location())); auto It = Result.Files.try_emplace(FID); TokenBuffer::MarkedFile &File = It.first->second; - File.EndExpanded = I + 1; + // The eof token should not be considered part of the main-file's range. + File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1; + if (!It.second) continue; // we have seen this file before. - // This is the first time we see this file. File.BeginExpanded = I; File.SpelledTokens = tokenize(FID, SM, LangOpts); } } - void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, - unsigned ExpandedIndex, unsigned &SpelledIndex) { - consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex); - } - - /// Consumes spelled tokens that form a macro expansion and adds a entry to - /// the resulting token buffer. - /// (!) SpelledIndex is updated in-place. - void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, - unsigned BeginExpanded, unsigned EndExpanded, - unsigned &SpelledIndex) { - // We need to record this mapping before continuing. - unsigned MappingBegin = SpelledIndex; - ++SpelledIndex; - - bool HitMapping = - tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue(); - (void)HitMapping; - assert(!HitMapping && "recursive macro expansion?"); - - TokenBuffer::Mapping M; - M.BeginExpanded = BeginExpanded; - M.EndExpanded = EndExpanded; - M.BeginSpelled = MappingBegin; - M.EndSpelled = SpelledIndex; - - File.Mappings.push_back(M); - } - - /// Consumes spelled tokens until location \p L is reached and adds a mapping - /// covering the consumed tokens. The mapping will point to an empty expanded - /// range at position \p ExpandedIndex. - void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L, - unsigned ExpandedIndex) { - assert(L.isFileID()); - FileID FID; - unsigned Offset; - std::tie(FID, Offset) = SM.getDecomposedLoc(L); - - unsigned &SpelledIndex = NextSpelled[FID]; - unsigned MappingBegin = SpelledIndex; - while (true) { - auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex); - if (SpelledIndex != MappingBegin) { - TokenBuffer::Mapping M; - M.BeginSpelled = MappingBegin; - M.EndSpelled = SpelledIndex; - M.BeginExpanded = M.EndExpanded = ExpandedIndex; - File.Mappings.push_back(M); - } - if (!EndLoc) - break; - consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex, - SpelledIndex); - - MappingBegin = SpelledIndex; - } - }; - - /// Consumes spelled tokens until it reaches Offset or a mapping boundary, - /// i.e. a name of a macro expansion or the start '#' token of a PP directive. - /// (!) NextSpelled is updated in place. - /// - /// returns None if \p Offset was reached, otherwise returns the end location - /// of a mapping that starts at \p NextSpelled. - llvm::Optional<SourceLocation> - tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset, - unsigned &NextSpelled) { - for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) { - auto L = File.SpelledTokens[NextSpelled].location(); - if (Offset <= SM.getFileOffset(L)) - return llvm::None; // reached the offset we are looking for. - auto Mapping = CollectedExpansions.find(L.getRawEncoding()); - if (Mapping != CollectedExpansions.end()) - return Mapping->second; // found a mapping before the offset. - } - return llvm::None; // no more tokens, we "reached" the offset. - } - - /// Adds empty mappings for unconsumed spelled tokens at the end of each file. - void fillGapsAtEndOfFiles() { - for (auto &F : Result.Files) { - if (F.second.SpelledTokens.empty()) - continue; - fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(), - F.second.EndExpanded); - } - } - TokenBuffer Result; - /// For each file, a position of the next spelled token we will consume. - llvm::DenseMap<FileID, unsigned> NextSpelled; + unsigned NextExpanded = 0; // cursor in ExpandedTokens + llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens PPExpansions CollectedExpansions; const SourceManager &SM; const LangOptions &LangOpts; @@ -604,19 +802,20 @@ TokenBuffer TokenCollector::consume() && { } std::string syntax::Token::str() const { - return llvm::formatv("Token({0}, length = {1})", tok::getTokenName(kind()), - length()); + return std::string(llvm::formatv("Token({0}, length = {1})", + tok::getTokenName(kind()), length())); } std::string syntax::Token::dumpForTests(const SourceManager &SM) const { - return llvm::formatv("{0} {1}", tok::getTokenName(kind()), text(SM)); + return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM), + tok::getTokenName(kind()), length())); } std::string TokenBuffer::dumpForTests() const { auto PrintToken = [this](const syntax::Token &T) -> std::string { if (T.kind() == tok::eof) return "<eof>"; - return T.text(*SourceMgr); + return std::string(T.text(*SourceMgr)); }; auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS, diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index 9a6270ec4cce3..37579e6145b65 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -58,22 +58,33 @@ bool syntax::Leaf::classof(const Node *N) { syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)), - Role(static_cast<unsigned>(NodeRole::Detached)), Original(false), - CanModify(false) {} + Role(0), Original(false), CanModify(false) { + this->setRole(NodeRole::Detached); +} bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; } +void syntax::Node::setRole(NodeRole NR) { + this->Role = static_cast<unsigned>(NR); +} + bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; } void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { - assert(Child->Parent == nullptr); - assert(Child->NextSibling == nullptr); assert(Child->role() == NodeRole::Detached); assert(Role != NodeRole::Detached); + Child->setRole(Role); + prependChildLowLevel(Child); +} + +void syntax::Tree::prependChildLowLevel(Node *Child) { + assert(Child->Parent == nullptr); + assert(Child->NextSibling == nullptr); + assert(Child->role() != NodeRole::Detached); + Child->Parent = this; Child->NextSibling = this->FirstChild; - Child->Role = static_cast<unsigned>(Role); this->FirstChild = Child; } @@ -94,7 +105,7 @@ void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, N != End;) { auto *Next = N->NextSibling; - N->Role = static_cast<unsigned>(NodeRole::Detached); + N->setRole(NodeRole::Detached); N->Parent = nullptr; N->NextSibling = nullptr; if (N->Original) |