diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
commit | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch) | |
tree | 4adf86a776049cbf7f69a1929c4babcbbef925eb /clang/lib/Tooling/Syntax | |
parent | 7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff) |
Notes
Diffstat (limited to 'clang/lib/Tooling/Syntax')
-rw-r--r-- | clang/lib/Tooling/Syntax/BuildTree.cpp | 501 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/ComputeReplacements.cpp | 126 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Mutations.cpp | 98 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Nodes.cpp | 221 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Synthesis.cpp | 45 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Tokens.cpp | 48 | ||||
-rw-r--r-- | clang/lib/Tooling/Syntax/Tree.cpp | 142 |
7 files changed, 1105 insertions, 76 deletions
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index a0b653df133d..aa8844771d37 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/Basic/LLVM.h" @@ -21,12 +23,17 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include <map> using namespace clang; +LLVM_ATTRIBUTE_UNUSED +static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } + /// A helper class for constructing the syntax tree while traversing a clang /// AST. /// @@ -44,7 +51,10 @@ using namespace clang; /// Call finalize() to finish building the tree and consume the root node. class syntax::TreeBuilder { public: - TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {} + TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { + for (const auto &T : Arena.tokenBuffer().expandedTokens()) + LocationToToken.insert({T.location().getRawEncoding(), &T}); + } llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } @@ -52,8 +62,25 @@ public: /// Range. void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); + /// Must be called with the range of each `DeclaratorDecl`. Ensures the + /// corresponding declarator nodes are covered by `SimpleDeclaration`. + void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range); + + /// Notifies that we should not consume trailing semicolon when computing + /// token range of \p D. + void noticeDeclaratorWithoutSemicolon(Decl *D); + + /// Mark the \p Child node with a corresponding \p Role. All marked children + /// should be consumed by foldNode. + /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), + /// wraps expressions into expression statement. + void markStmtChild(Stmt *Child, NodeRole Role); + /// Should be called for expressions in non-statement position to avoid + /// wrapping into expression statement. + void markExprChild(Expr *Child, NodeRole Role); + /// Set role for a token starting at \p Loc. - void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); + void markChildToken(SourceLocation Loc, NodeRole R); /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { @@ -62,10 +89,12 @@ public: assert(Tokens.back().kind() == tok::eof); // Build the root of the tree, consuming all the children. - Pending.foldChildren(Tokens.drop_back(), + Pending.foldChildren(Arena, Tokens.drop_back(), new (Arena.allocator()) syntax::TranslationUnit); - return cast<syntax::TranslationUnit>(std::move(Pending).finalize()); + auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); + TU->assertInvariantsRecursive(); + return TU; } /// getRange() finds the syntax tokens corresponding to the passed source @@ -81,13 +110,43 @@ public: return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { - return getRange(D->getBeginLoc(), D->getEndLoc()); + auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); + if (llvm::isa<NamespaceDecl>(D)) + return Tokens; + if (DeclsWithoutSemicolons.count(D)) + return Tokens; + // FIXME: do not consume trailing semicolon on function definitions. + // Most declarations own a semicolon in syntax trees, but not in clang AST. + return withTrailingSemicolon(Tokens); } - llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const { - return getRange(S->getBeginLoc(), S->getEndLoc()); + llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { + return getRange(E->getBeginLoc(), E->getEndLoc()); + } + /// Find the adjusted range for the statement, consuming the trailing + /// semicolon when needed. + llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { + auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); + if (isa<CompoundStmt>(S)) + return Tokens; + + // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and + // all statements that end with those. Consume this semicolon here. + if (Tokens.back().kind() == tok::semi) + return Tokens; + return withTrailingSemicolon(Tokens); } private: + llvm::ArrayRef<syntax::Token> + withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { + assert(!Tokens.empty()); + assert(Tokens.back().kind() != tok::eof); + // (!) we never consume 'eof', so looking at the next token is ok. + if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) + return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); + return Tokens; + } + /// Finds a token starting at \p L. The token must exist. const syntax::Token *findToken(SourceLocation L) const; @@ -103,11 +162,16 @@ private: assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) - Trees.insert(Trees.end(), - {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); + for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { + auto *L = new (A.allocator()) syntax::Leaf(&T); + L->Original = true; + L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); + Trees.insert(Trees.end(), {&T, NodeAndRole{L}}); + } } + ~Forest() { assert(DelayedFolds.empty()); } + void assignRole(llvm::ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { assert(!Range.empty()); @@ -120,30 +184,47 @@ private: It->second.Role = Role; } - /// Add \p Node to the forest and fill its children nodes based on the \p - /// NodeRange. - void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens, + /// Add \p Node to the forest and attach child nodes based on \p Tokens. + void foldChildren(const syntax::Arena &A, + llvm::ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) { - assert(!NodeTokens.empty()); - assert(Node->firstChild() == nullptr && "node already has children"); - - auto *FirstToken = NodeTokens.begin(); - auto BeginChildren = Trees.lower_bound(FirstToken); - assert(BeginChildren != Trees.end() && - BeginChildren->first == FirstToken && - "fold crosses boundaries of existing subtrees"); - auto EndChildren = Trees.lower_bound(NodeTokens.end()); - assert((EndChildren == Trees.end() || - EndChildren->first == NodeTokens.end()) && - "fold crosses boundaries of existing subtrees"); + // Execute delayed folds inside `Tokens`. + auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); + auto It = BeginExecuted; + for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) + foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End), + It->second.Node); + DelayedFolds.erase(BeginExecuted, It); + + // Attach children to `Node`. + foldChildrenEager(A, Tokens, Node); + } - // (!) we need to go in reverse order, because we can only prepend. - for (auto It = EndChildren; It != BeginChildren; --It) - Node->prependChildLowLevel(std::prev(It)->second.Node, - std::prev(It)->second.Role); + /// Schedule a call to `foldChildren` that will only be executed when + /// containing node is folded. The range of delayed nodes can be extended by + /// calling `extendDelayedFold`. Only one delayed node for each starting + /// token is allowed. + void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens, + syntax::Tree *Node) { + assert(!Tokens.empty()); + bool Inserted = + DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) + .second; + (void)Inserted; + assert(Inserted && "Multiple delayed folds start at the same token"); + } - Trees.erase(BeginChildren, EndChildren); - Trees.insert({FirstToken, NodeAndRole(Node)}); + /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends + /// its endpoint to `ExtendedRange.end()` and returns true. + /// Otherwise, returns false. + bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) { + assert(!ExtendedRange.empty()); + auto It = DelayedFolds.find(ExtendedRange.data()); + if (It == DelayedFolds.end()) + return false; + assert(It->second.End <= ExtendedRange.end()); + It->second.End = ExtendedRange.end(); + return true; } // EXPECTS: all tokens were consumed and are owned by a single root node. @@ -171,6 +252,35 @@ private: } private: + /// Implementation detail of `foldChildren`, does acutal folding ignoring + /// delayed folds. + void foldChildrenEager(const syntax::Arena &A, + llvm::ArrayRef<syntax::Token> Tokens, + syntax::Tree *Node) { + assert(Node->firstChild() == nullptr && "node already has children"); + + auto *FirstToken = Tokens.begin(); + auto BeginChildren = Trees.lower_bound(FirstToken); + assert((BeginChildren == Trees.end() || + BeginChildren->first == FirstToken) && + "fold crosses boundaries of existing subtrees"); + auto EndChildren = Trees.lower_bound(Tokens.end()); + assert( + (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && + "fold crosses boundaries of existing subtrees"); + + // (!) we need to go in reverse order, because we can only prepend. + for (auto It = EndChildren; It != BeginChildren; --It) + Node->prependChildLowLevel(std::prev(It)->second.Node, + std::prev(It)->second.Role); + + // Mark that this node came from the AST and is backed by the source code. + Node->Original = true; + Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); + + Trees.erase(BeginChildren, EndChildren); + Trees.insert({FirstToken, NodeAndRole(Node)}); + } /// A with a role that should be assigned to it when adding to a parent. struct NodeAndRole { explicit NodeAndRole(syntax::Node *Node) @@ -181,16 +291,29 @@ private: }; /// Maps from the start token to a subtree starting at that token. + /// Keys in the map are pointers into the array of expanded tokens, so + /// pointer order corresponds to the order of preprocessor tokens. /// FIXME: storing the end tokens is redundant. /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. std::map<const syntax::Token *, NodeAndRole> Trees; + + /// See documentation of `foldChildrenDelayed` for details. + struct DelayedFold { + const syntax::Token *End = nullptr; + syntax::Tree *Node = nullptr; + }; + std::map<const syntax::Token *, DelayedFold> DelayedFolds; }; /// For debugging purposes. std::string str() { return Pending.str(Arena); } syntax::Arena &Arena; + /// To quickly find tokens by their start location. + llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> + LocationToToken; Forest Pending; + llvm::DenseSet<Decl *> DeclsWithoutSemicolons; }; namespace { @@ -201,20 +324,34 @@ public: bool shouldTraversePostOrder() const { return true; } - bool TraverseDecl(Decl *D) { - if (!D || isa<TranslationUnitDecl>(D)) - return RecursiveASTVisitor::TraverseDecl(D); - if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext())) - return true; // Only build top-level decls for now, do not recurse. - return RecursiveASTVisitor::TraverseDecl(D); + bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) { + // Ensure declarators are covered by SimpleDeclaration. + Builder.noticeDeclaratorRange(Builder.getRange(D)); + // FIXME: build nodes for the declarator too. + return true; + } + bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { + // Also a declarator. + Builder.noticeDeclaratorRange(Builder.getRange(D)); + // FIXME: build nodes for the declarator too. + return true; } bool VisitDecl(Decl *D) { - assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) && - "expected a top-level decl"); assert(!D->isImplicit()); Builder.foldNode(Builder.getRange(D), - new (allocator()) syntax::TopLevelDeclaration()); + new (allocator()) syntax::UnknownDeclaration()); + return true; + } + + bool WalkUpFromTagDecl(TagDecl *C) { + // FIXME: build the ClassSpecifier node. + if (C->isFreeStanding()) { + // Class is a declaration specifier and needs a spanning declaration node. + Builder.foldNode(Builder.getRange(C), + new (allocator()) syntax::SimpleDeclaration); + return true; + } return true; } @@ -227,16 +364,238 @@ public: bool WalkUpFromCompoundStmt(CompoundStmt *S) { using NodeRole = syntax::NodeRole; - Builder.markChildToken(S->getLBracLoc(), tok::l_brace, - NodeRole::CompoundStatement_lbrace); - Builder.markChildToken(S->getRBracLoc(), tok::r_brace, - NodeRole::CompoundStatement_rbrace); + Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); + for (auto *Child : S->body()) + Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); + Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); - Builder.foldNode(Builder.getRange(S), + Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::CompoundStatement); return true; } + // Some statements are not yet handled by syntax trees. + bool WalkUpFromStmt(Stmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::UnknownStatement); + return true; + } + + bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { + // We override to traverse range initializer as VarDecl. + // RAV traverses it as a statement, we produce invalid node kinds in that + // case. + // FIXME: should do this in RAV instead? + if (S->getInit() && !TraverseStmt(S->getInit())) + return false; + if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) + return false; + if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) + return false; + if (S->getBody() && !TraverseStmt(S->getBody())) + return false; + return true; + } + + bool TraverseStmt(Stmt *S) { + if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { + // We want to consume the semicolon, make sure SimpleDeclaration does not. + for (auto *D : DS->decls()) + Builder.noticeDeclaratorWithoutSemicolon(D); + } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { + // (!) do not recurse into subexpressions. + // we do not have syntax trees for expressions yet, so we only want to see + // the first top-level expression. + return WalkUpFromExpr(E->IgnoreImplicit()); + } + return RecursiveASTVisitor::TraverseStmt(S); + } + + // Some expressions are not yet handled by syntax trees. + bool WalkUpFromExpr(Expr *E) { + assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); + Builder.foldNode(Builder.getExprRange(E), + new (allocator()) syntax::UnknownExpression); + return true; + } + + bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { + auto Tokens = Builder.getRange(S); + if (Tokens.front().kind() == tok::coloncolon) { + // Handle nested namespace definitions. Those start at '::' token, e.g. + // namespace a^::b {} + // FIXME: build corresponding nodes for the name of this namespace. + return true; + } + Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition); + return true; + } + + // The code below is very regular, it could even be generated with some + // preprocessor magic. We merely assign roles to the corresponding children + // and fold resulting nodes. + bool WalkUpFromDeclStmt(DeclStmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::DeclarationStatement); + return true; + } + + bool WalkUpFromNullStmt(NullStmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::EmptyStatement); + return true; + } + + bool WalkUpFromSwitchStmt(SwitchStmt *S) { + Builder.markChildToken(S->getSwitchLoc(), + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::SwitchStatement); + return true; + } + + bool WalkUpFromCaseStmt(CaseStmt *S) { + Builder.markChildToken(S->getKeywordLoc(), + syntax::NodeRole::IntroducerKeyword); + Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); + Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::CaseStatement); + return true; + } + + bool WalkUpFromDefaultStmt(DefaultStmt *S) { + Builder.markChildToken(S->getKeywordLoc(), + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::DefaultStatement); + return true; + } + + bool WalkUpFromIfStmt(IfStmt *S) { + Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getThen(), + syntax::NodeRole::IfStatement_thenStatement); + Builder.markChildToken(S->getElseLoc(), + syntax::NodeRole::IfStatement_elseKeyword); + Builder.markStmtChild(S->getElse(), + syntax::NodeRole::IfStatement_elseStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::IfStatement); + return true; + } + + bool WalkUpFromForStmt(ForStmt *S) { + Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ForStatement); + return true; + } + + bool WalkUpFromWhileStmt(WhileStmt *S) { + Builder.markChildToken(S->getWhileLoc(), + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::WhileStatement); + return true; + } + + bool WalkUpFromContinueStmt(ContinueStmt *S) { + Builder.markChildToken(S->getContinueLoc(), + syntax::NodeRole::IntroducerKeyword); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ContinueStatement); + return true; + } + + bool WalkUpFromBreakStmt(BreakStmt *S) { + Builder.markChildToken(S->getBreakLoc(), + syntax::NodeRole::IntroducerKeyword); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::BreakStatement); + return true; + } + + bool WalkUpFromReturnStmt(ReturnStmt *S) { + Builder.markChildToken(S->getReturnLoc(), + syntax::NodeRole::IntroducerKeyword); + Builder.markExprChild(S->getRetValue(), + syntax::NodeRole::ReturnStatement_value); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ReturnStatement); + return true; + } + + bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { + Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::RangeBasedForStatement); + return true; + } + + bool WalkUpFromEmptyDecl(EmptyDecl *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::EmptyDeclaration); + return true; + } + + bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { + Builder.markExprChild(S->getAssertExpr(), + syntax::NodeRole::StaticAssertDeclaration_condition); + Builder.markExprChild(S->getMessage(), + syntax::NodeRole::StaticAssertDeclaration_message); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::StaticAssertDeclaration); + return true; + } + + bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::LinkageSpecificationDeclaration); + return true; + } + + bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::NamespaceAliasDefinition); + return true; + } + + bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::UsingNamespaceDirective); + return true; + } + + bool WalkUpFromUsingDecl(UsingDecl *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::UsingDeclaration); + return true; + } + + bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::UsingDeclaration); + return true; + } + + bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::UsingDeclaration); + return true; + } + + bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::TypeAliasDeclaration); + return true; + } + private: /// A small helper to save some typing. llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } @@ -248,25 +607,55 @@ private: void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New) { - Pending.foldChildren(Range, New); + Pending.foldChildren(Arena, Range, New); +} + +void syntax::TreeBuilder::noticeDeclaratorRange( + llvm::ArrayRef<syntax::Token> Range) { + if (Pending.extendDelayedFold(Range)) + return; + Pending.foldChildrenDelayed(Range, + new (allocator()) syntax::SimpleDeclaration); +} + +void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) { + DeclsWithoutSemicolons.insert(D); } -void syntax::TreeBuilder::markChildToken(SourceLocation Loc, - tok::TokenKind Kind, NodeRole Role) { +void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { if (Loc.isInvalid()) return; Pending.assignRole(*findToken(Loc), Role); } +void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { + if (!Child) + return; + + auto Range = getStmtRange(Child); + // This is an expression in a statement position, consume the trailing + // semicolon and form an 'ExpressionStatement' node. + if (auto *E = dyn_cast<Expr>(Child)) { + Pending.assignRole(getExprRange(E), + NodeRole::ExpressionStatement_expression); + // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. + Pending.foldChildren(Arena, Range, + new (allocator()) syntax::ExpressionStatement); + } + Pending.assignRole(Range, Role); +} + +void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { + if (!Child) + return; + + Pending.assignRole(getExprRange(Child), Role); +} + const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { - auto Tokens = Arena.tokenBuffer().expandedTokens(); - auto &SM = Arena.sourceManager(); - auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) { - return SM.isBeforeInTranslationUnit(T.location(), L); - }); - assert(It != Tokens.end()); - assert(It->location() == L); - return &*It; + auto It = LocationToToken.find(L.getRawEncoding()); + assert(It != LocationToToken.end()); + return It->second; } syntax::TranslationUnit * diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp new file mode 100644 index 000000000000..30b3ee17d092 --- /dev/null +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -0,0 +1,126 @@ +//===- ComputeReplacements.cpp --------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/Support/Error.h" + +using namespace clang; + +namespace { +using ProcessTokensFn = llvm::function_ref<void(llvm::ArrayRef<syntax::Token>, + bool /*IsOriginal*/)>; +/// Enumerates spans of tokens from the tree consecutively laid out in memory. +void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { + struct Enumerator { + Enumerator(ProcessTokensFn Callback) + : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), + Callback(Callback) {} + + void run(const syntax::Tree *Root) { + process(Root); + // Report the last span to the user. + if (SpanBegin) + Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); + } + + private: + void process(const syntax::Node *N) { + if (auto *T = dyn_cast<syntax::Tree>(N)) { + for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) + process(C); + return; + } + + auto *L = cast<syntax::Leaf>(N); + if (SpanEnd == L->token() && SpanIsOriginal == L->isOriginal()) { + // Extend the current span. + ++SpanEnd; + return; + } + // Report the current span to the user. + if (SpanBegin) + Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); + // Start recording a new span. + SpanBegin = L->token(); + SpanEnd = SpanBegin + 1; + SpanIsOriginal = L->isOriginal(); + } + + const syntax::Token *SpanBegin; + const syntax::Token *SpanEnd; + bool SpanIsOriginal; + ProcessTokensFn Callback; + }; + + return Enumerator(Callback).run(Root); +} + +syntax::FileRange rangeOfExpanded(const syntax::Arena &A, + llvm::ArrayRef<syntax::Token> Expanded) { + auto &Buffer = A.tokenBuffer(); + auto &SM = A.sourceManager(); + + // Check that \p Expanded actually points into expanded tokens. + assert(Buffer.expandedTokens().begin() <= Expanded.begin()); + assert(Expanded.end() < Buffer.expandedTokens().end()); + + if (Expanded.empty()) + // (!) empty tokens must always point before end(). + return syntax::FileRange( + SM, SM.getExpansionLoc(Expanded.begin()->location()), /*Length=*/0); + + auto Spelled = Buffer.spelledForExpanded(Expanded); + assert(Spelled && "could not find spelled tokens for expanded"); + return syntax::Token::range(SM, Spelled->front(), Spelled->back()); +} +} // namespace + +tooling::Replacements +syntax::computeReplacements(const syntax::Arena &A, + const syntax::TranslationUnit &TU) { + auto &Buffer = A.tokenBuffer(); + auto &SM = A.sourceManager(); + + tooling::Replacements Replacements; + // Text inserted by the replacement we are building now. + std::string Replacement; + auto emitReplacement = [&](llvm::ArrayRef<syntax::Token> ReplacedRange) { + if (ReplacedRange.empty() && Replacement.empty()) + return; + llvm::cantFail(Replacements.add(tooling::Replacement( + SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement))); + Replacement = ""; + }; + + const syntax::Token *NextOriginal = Buffer.expandedTokens().begin(); + enumerateTokenSpans( + &TU, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) { + if (!IsOriginal) { + Replacement += + syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM); + return; + } + assert(NextOriginal <= Tokens.begin()); + // We are looking at a span of original tokens. + if (NextOriginal != Tokens.begin()) { + // There is a gap, record a replacement or deletion. + emitReplacement(llvm::makeArrayRef(NextOriginal, Tokens.begin())); + } else { + // No gap, but we may have pending insertions. Emit them now. + emitReplacement(llvm::makeArrayRef(NextOriginal, /*Length=*/0)); + } + NextOriginal = Tokens.end(); + }); + + // We might have pending replacements at the end of file. If so, emit them. + emitReplacement(llvm::makeArrayRef( + NextOriginal, Buffer.expandedTokens().drop_back().end())); + + return Replacements; +} diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp new file mode 100644 index 000000000000..72458528202e --- /dev/null +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -0,0 +1,98 @@ +//===- Mutations.cpp ------------------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Token.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include <cassert> +#include <string> + +using namespace clang; + +// This class has access to the internals of tree nodes. Its sole purpose is to +// define helpers that allow implementing the high-level mutation operations. +class syntax::MutationsImpl { +public: + /// Add a new node with a specified role. + static void addAfter(syntax::Node *Anchor, syntax::Node *New, NodeRole Role) { + assert(Anchor != nullptr); + assert(New->Parent == nullptr); + assert(New->NextSibling == nullptr); + assert(!New->isDetached()); + assert(Role != NodeRole::Detached); + + New->Role = static_cast<unsigned>(Role); + auto *P = Anchor->parent(); + P->replaceChildRangeLowLevel(Anchor, Anchor, New); + + P->assertInvariants(); + } + + /// Replace the node, keeping the role. + static void replace(syntax::Node *Old, syntax::Node *New) { + assert(Old != nullptr); + assert(Old->Parent != nullptr); + assert(Old->canModify()); + assert(New->Parent == nullptr); + assert(New->NextSibling == nullptr); + assert(New->isDetached()); + + New->Role = Old->Role; + auto *P = Old->parent(); + P->replaceChildRangeLowLevel(findPrevious(Old), Old->nextSibling(), New); + + P->assertInvariants(); + } + + /// Completely remove the node from its parent. + static void remove(syntax::Node *N) { + auto *P = N->parent(); + P->replaceChildRangeLowLevel(findPrevious(N), N->nextSibling(), + /*New=*/nullptr); + + P->assertInvariants(); + N->assertInvariants(); + } + +private: + static syntax::Node *findPrevious(syntax::Node *N) { + if (N->parent()->firstChild() == N) + return nullptr; + for (syntax::Node *C = N->parent()->firstChild(); C != nullptr; + C = C->nextSibling()) { + if (C->nextSibling() == N) + return C; + } + llvm_unreachable("could not find a child node"); + } +}; + +void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) { + assert(S); + assert(S->canModify()); + + if (isa<CompoundStatement>(S->parent())) { + // A child of CompoundStatement can just be safely removed. + MutationsImpl::remove(S); + return; + } + // For the rest, we have to replace with an empty statement. + if (isa<EmptyStatement>(S)) + return; // already an empty statement, nothing to do. + + MutationsImpl::replace(S, createEmptyStatement(A)); +} diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 061ed73bbebd..5b0c5107c134 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -16,20 +16,233 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "Leaf"; case NodeKind::TranslationUnit: return OS << "TranslationUnit"; - case NodeKind::TopLevelDeclaration: - return OS << "TopLevelDeclaration"; + case NodeKind::UnknownExpression: + return OS << "UnknownExpression"; + case NodeKind::UnknownStatement: + return OS << "UnknownStatement"; + case NodeKind::DeclarationStatement: + return OS << "DeclarationStatement"; + case NodeKind::EmptyStatement: + return OS << "EmptyStatement"; + case NodeKind::SwitchStatement: + return OS << "SwitchStatement"; + case NodeKind::CaseStatement: + return OS << "CaseStatement"; + case NodeKind::DefaultStatement: + return OS << "DefaultStatement"; + case NodeKind::IfStatement: + return OS << "IfStatement"; + case NodeKind::ForStatement: + return OS << "ForStatement"; + case NodeKind::WhileStatement: + return OS << "WhileStatement"; + case NodeKind::ContinueStatement: + return OS << "ContinueStatement"; + case NodeKind::BreakStatement: + return OS << "BreakStatement"; + case NodeKind::ReturnStatement: + return OS << "ReturnStatement"; + case NodeKind::RangeBasedForStatement: + return OS << "RangeBasedForStatement"; + case NodeKind::ExpressionStatement: + return OS << "ExpressionStatement"; case NodeKind::CompoundStatement: return OS << "CompoundStatement"; + case NodeKind::UnknownDeclaration: + return OS << "UnknownDeclaration"; + case NodeKind::EmptyDeclaration: + return OS << "EmptyDeclaration"; + case NodeKind::StaticAssertDeclaration: + return OS << "StaticAssertDeclaration"; + case NodeKind::LinkageSpecificationDeclaration: + return OS << "LinkageSpecificationDeclaration"; + case NodeKind::SimpleDeclaration: + return OS << "SimpleDeclaration"; + case NodeKind::NamespaceDefinition: + return OS << "NamespaceDefinition"; + case NodeKind::NamespaceAliasDefinition: + return OS << "NamespaceAliasDefinition"; + case NodeKind::UsingNamespaceDirective: + return OS << "UsingNamespaceDirective"; + case NodeKind::UsingDeclaration: + return OS << "UsingDeclaration"; + case NodeKind::TypeAliasDeclaration: + return OS << "TypeAliasDeclaration"; } llvm_unreachable("unknown node kind"); } +llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { + switch (R) { + case syntax::NodeRole::Detached: + return OS << "Detached"; + case syntax::NodeRole::Unknown: + return OS << "Unknown"; + case syntax::NodeRole::OpenParen: + return OS << "OpenParen"; + case syntax::NodeRole::CloseParen: + return OS << "CloseParen"; + case syntax::NodeRole::IntroducerKeyword: + return OS << "IntroducerKeyword"; + case syntax::NodeRole::BodyStatement: + return OS << "BodyStatement"; + case syntax::NodeRole::CaseStatement_value: + return OS << "CaseStatement_value"; + case syntax::NodeRole::IfStatement_thenStatement: + return OS << "IfStatement_thenStatement"; + case syntax::NodeRole::IfStatement_elseKeyword: + return OS << "IfStatement_elseKeyword"; + case syntax::NodeRole::IfStatement_elseStatement: + return OS << "IfStatement_elseStatement"; + case syntax::NodeRole::ReturnStatement_value: + return OS << "ReturnStatement_value"; + case syntax::NodeRole::ExpressionStatement_expression: + return OS << "ExpressionStatement_expression"; + case syntax::NodeRole::CompoundStatement_statement: + return OS << "CompoundStatement_statement"; + case syntax::NodeRole::StaticAssertDeclaration_condition: + return OS << "StaticAssertDeclaration_condition"; + case syntax::NodeRole::StaticAssertDeclaration_message: + return OS << "StaticAssertDeclaration_message"; + } + llvm_unreachable("invalid role"); +} + +syntax::Leaf *syntax::SwitchStatement::switchKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::SwitchStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::CaseStatement::caseKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Expression *syntax::CaseStatement::value() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::CaseStatement_value)); +} + +syntax::Statement *syntax::CaseStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::DefaultStatement::defaultKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::DefaultStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::IfStatement::ifKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::IfStatement::thenStatement() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::IfStatement_thenStatement)); +} + +syntax::Leaf *syntax::IfStatement::elseKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IfStatement_elseKeyword)); +} + +syntax::Statement *syntax::IfStatement::elseStatement() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::IfStatement_elseStatement)); +} + +syntax::Leaf *syntax::ForStatement::forKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::ForStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::WhileStatement::whileKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::WhileStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::ContinueStatement::continueKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::BreakStatement::breakKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::ReturnStatement::returnKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Expression *syntax::ReturnStatement::value() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::ReturnStatement_value)); +} + +syntax::Leaf *syntax::RangeBasedForStatement::forKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::RangeBasedForStatement::body() { + return llvm::cast_or_null<syntax::Statement>( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Expression *syntax::ExpressionStatement::expression() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::ExpressionStatement_expression)); +} + syntax::Leaf *syntax::CompoundStatement::lbrace() { return llvm::cast_or_null<syntax::Leaf>( - findChild(NodeRole::CompoundStatement_lbrace)); + findChild(syntax::NodeRole::OpenParen)); +} + +std::vector<syntax::Statement *> syntax::CompoundStatement::statements() { + std::vector<syntax::Statement *> Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + if (C->role() == syntax::NodeRole::CompoundStatement_statement) + Children.push_back(llvm::cast<syntax::Statement>(C)); + } + return Children; } syntax::Leaf *syntax::CompoundStatement::rbrace() { return llvm::cast_or_null<syntax::Leaf>( - findChild(NodeRole::CompoundStatement_rbrace)); + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::Expression *syntax::StaticAssertDeclaration::condition() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::StaticAssertDeclaration_condition)); +} + +syntax::Expression *syntax::StaticAssertDeclaration::message() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::StaticAssertDeclaration_message)); } diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp new file mode 100644 index 000000000000..aa01a34c761f --- /dev/null +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -0,0 +1,45 @@ +//===- Synthesis.cpp ------------------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Syntax/BuildTree.h" + +using namespace clang; + +/// Exposes private syntax tree APIs required to implement node synthesis. +/// Should not be used for anything else. +class syntax::FactoryImpl { +public: + static void setCanModify(syntax::Node *N) { N->CanModify = true; } + + static void prependChildLowLevel(syntax::Tree *T, syntax::Node *Child, + syntax::NodeRole R) { + T->prependChildLowLevel(Child, R); + } +}; + +clang::syntax::Leaf *syntax::createPunctuation(clang::syntax::Arena &A, + clang::tok::TokenKind K) { + auto Tokens = A.lexBuffer(llvm::MemoryBuffer::getMemBuffer( + clang::tok::getPunctuatorSpelling(K))) + .second; + assert(Tokens.size() == 1); + assert(Tokens.front().kind() == K); + auto *L = new (A.allocator()) clang::syntax::Leaf(Tokens.begin()); + FactoryImpl::setCanModify(L); + L->assertInvariants(); + return L; +} + +clang::syntax::EmptyStatement * +syntax::createEmptyStatement(clang::syntax::Arena &A) { + auto *S = new (A.allocator()) clang::syntax::EmptyStatement; + FactoryImpl::setCanModify(S); + FactoryImpl::prependChildLowLevel(S, createPunctuation(A, clang::tok::semi), + NodeRole::Unknown); + S->assertInvariants(); + return S; +} diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index a2c3bc137d6b..3df1c064923a 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -67,7 +67,7 @@ FileRange syntax::Token::range(const SourceManager &SM, auto F = First.range(SM); auto L = Last.range(SM); assert(F.file() == L.file() && "tokens from different files"); - assert(F.endOffset() <= L.beginOffset() && "wrong order of tokens"); + assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens"); return FileRange(F.file(), F.beginOffset(), L.endOffset()); } @@ -119,6 +119,28 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const { return Text.substr(Begin, length()); } +llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const { + if (R.isInvalid()) + return {}; + const Token *Begin = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); + }); + const Token *End = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); + }); + if (Begin > End) + return {}; + return {Begin, End}; +} + +CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { + return CharSourceRange( + SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)), + /*IsTokenRange=*/false); +} + std::pair<const syntax::Token *, const TokenBuffer::Mapping *> TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); @@ -232,6 +254,30 @@ TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const { return E; } +llvm::ArrayRef<syntax::Token> +syntax::spelledTokensTouching(SourceLocation Loc, + const syntax::TokenBuffer &Tokens) { + assert(Loc.isFileID()); + llvm::ArrayRef<syntax::Token> All = + Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)); + auto *Right = llvm::partition_point( + All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); + bool AcceptRight = Right != All.end() && Right->location() <= Loc; + bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc; + return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0), + Right + (AcceptRight ? 1 : 0)); +} + +const syntax::Token * +syntax::spelledIdentifierTouching(SourceLocation Loc, + const syntax::TokenBuffer &Tokens) { + for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) { + if (Tok.kind() == tok::identifier) + return &Tok; + } + return nullptr; +} + std::vector<const syntax::Token *> TokenBuffer::macroExpansions(FileID FID) const { auto FileIt = Files.find(FID); diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index 1549b6724fa4..9a6270ec4cce 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -11,9 +11,27 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Casting.h" +#include <cassert> using namespace clang; +namespace { +static void traverse(const syntax::Node *N, + llvm::function_ref<void(const syntax::Node *)> Visit) { + if (auto *T = dyn_cast<syntax::Tree>(N)) { + for (auto *C = T->firstChild(); C; C = C->nextSibling()) + traverse(C, Visit); + } + Visit(N); +} +static void traverse(syntax::Node *N, + llvm::function_ref<void(syntax::Node *)> Visit) { + traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) { + Visit(const_cast<syntax::Node *>(N)); + }); +} +} // namespace + syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, TokenBuffer Tokens) : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(std::move(Tokens)) {} @@ -40,7 +58,10 @@ bool syntax::Leaf::classof(const Node *N) { syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)), - Role(static_cast<unsigned>(NodeRole::Detached)) {} + Role(static_cast<unsigned>(NodeRole::Detached)), Original(false), + CanModify(false) {} + +bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; } bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; } @@ -56,15 +77,53 @@ void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { this->FirstChild = Child; } -namespace { -static void traverse(const syntax::Node *N, - llvm::function_ref<void(const syntax::Node *)> Visit) { - if (auto *T = dyn_cast<syntax::Tree>(N)) { - for (auto *C = T->firstChild(); C; C = C->nextSibling()) - traverse(C, Visit); +void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, + Node *New) { + assert(!BeforeBegin || BeforeBegin->Parent == this); + +#ifndef NDEBUG + for (auto *N = New; N; N = N->nextSibling()) { + assert(N->Parent == nullptr); + assert(N->role() != NodeRole::Detached && "Roles must be set"); + // FIXME: sanity-check the role. } - Visit(N); +#endif + + // Detach old nodes. + for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->nextSibling(); + N != End;) { + auto *Next = N->NextSibling; + + N->Role = static_cast<unsigned>(NodeRole::Detached); + N->Parent = nullptr; + N->NextSibling = nullptr; + if (N->Original) + traverse(N, [&](Node *C) { C->Original = false; }); + + N = Next; + } + + // Attach new nodes. + if (BeforeBegin) + BeforeBegin->NextSibling = New ? New : End; + else + FirstChild = New ? New : End; + + if (New) { + auto *Last = New; + for (auto *N = New; N != nullptr; N = N->nextSibling()) { + Last = N; + N->Parent = this; + } + Last->NextSibling = End; + } + + // Mark the node as modified. + for (auto *T = this; T && T->Original; T = T->Parent) + T->Original = false; } + +namespace { static void dumpTokens(llvm::raw_ostream &OS, ArrayRef<syntax::Token> Tokens, const SourceManager &SM) { assert(!Tokens.empty()); @@ -85,13 +144,16 @@ static void dumpTokens(llvm::raw_ostream &OS, ArrayRef<syntax::Token> Tokens, static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N, const syntax::Arena &A, std::vector<bool> IndentMask) { - if (N->role() != syntax::NodeRole::Unknown) { - // FIXME: print the symbolic name of a role. - if (N->role() == syntax::NodeRole::Detached) - OS << "*: "; - else - OS << static_cast<int>(N->role()) << ": "; - } + std::string Marks; + if (!N->isOriginal()) + Marks += "M"; + if (N->role() == syntax::NodeRole::Detached) + Marks += "*"; // FIXME: find a nice way to print other roles. + if (!N->canModify()) + Marks += "I"; + if (!Marks.empty()) + OS << Marks << ": "; + if (auto *L = llvm::dyn_cast<syntax::Leaf>(N)) { dumpTokens(OS, *L->token(), A.sourceManager()); OS << "\n"; @@ -136,10 +198,60 @@ std::string syntax::Node::dumpTokens(const Arena &A) const { if (!L) return; ::dumpTokens(OS, *L->token(), A.sourceManager()); + OS << " "; }); return OS.str(); } +void syntax::Node::assertInvariants() const { +#ifndef NDEBUG + if (isDetached()) + assert(parent() == nullptr); + else + assert(parent() != nullptr); + + auto *T = dyn_cast<Tree>(this); + if (!T) + return; + for (auto *C = T->firstChild(); C; C = C->nextSibling()) { + if (T->isOriginal()) + assert(C->isOriginal()); + assert(!C->isDetached()); + assert(C->parent() == T); + } +#endif +} + +void syntax::Node::assertInvariantsRecursive() const { +#ifndef NDEBUG + traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); }); +#endif +} + +syntax::Leaf *syntax::Tree::firstLeaf() { + auto *T = this; + while (auto *C = T->firstChild()) { + if (auto *L = dyn_cast<syntax::Leaf>(C)) + return L; + T = cast<syntax::Tree>(C); + } + return nullptr; +} + +syntax::Leaf *syntax::Tree::lastLeaf() { + auto *T = this; + while (auto *C = T->firstChild()) { + // Find the last child. + while (auto *Next = C->nextSibling()) + C = Next; + + if (auto *L = dyn_cast<syntax::Leaf>(C)) + return L; + T = cast<syntax::Tree>(C); + } + return nullptr; +} + syntax::Node *syntax::Tree::findChild(NodeRole R) { for (auto *C = FirstChild; C; C = C->nextSibling()) { if (C->role() == R) |