summaryrefslogtreecommitdiff
path: root/clang/lib/Tooling/Syntax
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Tooling/Syntax')
-rw-r--r--clang/lib/Tooling/Syntax/BuildTree.cpp1108
-rw-r--r--clang/lib/Tooling/Syntax/Mutations.cpp2
-rw-r--r--clang/lib/Tooling/Syntax/Nodes.cpp281
-rw-r--r--clang/lib/Tooling/Syntax/Tokens.cpp609
-rw-r--r--clang/lib/Tooling/Syntax/Tree.cpp23
5 files changed, 1598 insertions, 425 deletions
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index aa8844771d373..1f192180ec451 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -6,20 +6,32 @@
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Syntax/BuildTree.h"
+#include "clang/AST/ASTFwd.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
+#include "clang/AST/TypeLoc.h"
+#include "clang/AST/TypeLocVisitor.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/Lexer.h"
+#include "clang/Lex/LiteralSupport.h"
#include "clang/Tooling/Syntax/Nodes.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "clang/Tooling/Syntax/Tree.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
@@ -27,6 +39,7 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
#include <map>
using namespace clang;
@@ -34,6 +47,207 @@ using namespace clang;
LLVM_ATTRIBUTE_UNUSED
static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
+namespace {
+/// Get start location of the Declarator from the TypeLoc.
+/// E.g.:
+/// loc of `(` in `int (a)`
+/// loc of `*` in `int *(a)`
+/// loc of the first `(` in `int (*a)(int)`
+/// loc of the `*` in `int *(a)(int)`
+/// loc of the first `*` in `const int *const *volatile a;`
+///
+/// It is non-trivial to get the start location because TypeLocs are stored
+/// inside out. In the example above `*volatile` is the TypeLoc returned
+/// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()`
+/// returns.
+struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> {
+ SourceLocation VisitParenTypeLoc(ParenTypeLoc T) {
+ auto L = Visit(T.getInnerLoc());
+ if (L.isValid())
+ return L;
+ return T.getLParenLoc();
+ }
+
+ // Types spelled in the prefix part of the declarator.
+ SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) {
+ return HandlePointer(T);
+ }
+
+ SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) {
+ return HandlePointer(T);
+ }
+
+ SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) {
+ return HandlePointer(T);
+ }
+
+ SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) {
+ return HandlePointer(T);
+ }
+
+ SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) {
+ return HandlePointer(T);
+ }
+
+ // All other cases are not important, as they are either part of declaration
+ // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on
+ // existing declarators (e.g. QualifiedTypeLoc). They cannot start the
+ // declarator themselves, but their underlying type can.
+ SourceLocation VisitTypeLoc(TypeLoc T) {
+ auto N = T.getNextTypeLoc();
+ if (!N)
+ return SourceLocation();
+ return Visit(N);
+ }
+
+ SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) {
+ if (T.getTypePtr()->hasTrailingReturn())
+ return SourceLocation(); // avoid recursing into the suffix of declarator.
+ return VisitTypeLoc(T);
+ }
+
+private:
+ template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) {
+ auto L = Visit(T.getPointeeLoc());
+ if (L.isValid())
+ return L;
+ return T.getLocalSourceRange().getBegin();
+ }
+};
+} // namespace
+
+static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) {
+ switch (E.getOperator()) {
+ // Comparison
+ case OO_EqualEqual:
+ case OO_ExclaimEqual:
+ case OO_Greater:
+ case OO_GreaterEqual:
+ case OO_Less:
+ case OO_LessEqual:
+ case OO_Spaceship:
+ // Assignment
+ case OO_Equal:
+ case OO_SlashEqual:
+ case OO_PercentEqual:
+ case OO_CaretEqual:
+ case OO_PipeEqual:
+ case OO_LessLessEqual:
+ case OO_GreaterGreaterEqual:
+ case OO_PlusEqual:
+ case OO_MinusEqual:
+ case OO_StarEqual:
+ case OO_AmpEqual:
+ // Binary computation
+ case OO_Slash:
+ case OO_Percent:
+ case OO_Caret:
+ case OO_Pipe:
+ case OO_LessLess:
+ case OO_GreaterGreater:
+ case OO_AmpAmp:
+ case OO_PipePipe:
+ case OO_ArrowStar:
+ case OO_Comma:
+ return syntax::NodeKind::BinaryOperatorExpression;
+ case OO_Tilde:
+ case OO_Exclaim:
+ return syntax::NodeKind::PrefixUnaryOperatorExpression;
+ // Prefix/Postfix increment/decrement
+ case OO_PlusPlus:
+ case OO_MinusMinus:
+ switch (E.getNumArgs()) {
+ case 1:
+ return syntax::NodeKind::PrefixUnaryOperatorExpression;
+ case 2:
+ return syntax::NodeKind::PostfixUnaryOperatorExpression;
+ default:
+ llvm_unreachable("Invalid number of arguments for operator");
+ }
+ // Operators that can be unary or binary
+ case OO_Plus:
+ case OO_Minus:
+ case OO_Star:
+ case OO_Amp:
+ switch (E.getNumArgs()) {
+ case 1:
+ return syntax::NodeKind::PrefixUnaryOperatorExpression;
+ case 2:
+ return syntax::NodeKind::BinaryOperatorExpression;
+ default:
+ llvm_unreachable("Invalid number of arguments for operator");
+ }
+ return syntax::NodeKind::BinaryOperatorExpression;
+ // Not yet supported by SyntaxTree
+ case OO_New:
+ case OO_Delete:
+ case OO_Array_New:
+ case OO_Array_Delete:
+ case OO_Coawait:
+ case OO_Call:
+ case OO_Subscript:
+ case OO_Arrow:
+ return syntax::NodeKind::UnknownExpression;
+ case OO_Conditional: // not overloadable
+ case NUM_OVERLOADED_OPERATORS:
+ case OO_None:
+ llvm_unreachable("Not an overloadable operator");
+ }
+ llvm_unreachable("Unknown OverloadedOperatorKind enum");
+}
+
+/// Gets the range of declarator as defined by the C++ grammar. E.g.
+/// `int a;` -> range of `a`,
+/// `int *a;` -> range of `*a`,
+/// `int a[10];` -> range of `a[10]`,
+/// `int a[1][2][3];` -> range of `a[1][2][3]`,
+/// `int *a = nullptr` -> range of `*a = nullptr`.
+/// FIMXE: \p Name must be a source range, e.g. for `operator+`.
+static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T,
+ SourceLocation Name,
+ SourceRange Initializer) {
+ SourceLocation Start = GetStartLoc().Visit(T);
+ SourceLocation End = T.getSourceRange().getEnd();
+ assert(End.isValid());
+ if (Name.isValid()) {
+ if (Start.isInvalid())
+ Start = Name;
+ if (SM.isBeforeInTranslationUnit(End, Name))
+ End = Name;
+ }
+ if (Initializer.isValid()) {
+ auto InitializerEnd = Initializer.getEnd();
+ assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) ||
+ End == InitializerEnd);
+ End = InitializerEnd;
+ }
+ return SourceRange(Start, End);
+}
+
+namespace {
+/// All AST hierarchy roots that can be represented as pointers.
+using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>;
+/// Maintains a mapping from AST to syntax tree nodes. This class will get more
+/// complicated as we support more kinds of AST nodes, e.g. TypeLocs.
+/// FIXME: expose this as public API.
+class ASTToSyntaxMapping {
+public:
+ void add(ASTPtr From, syntax::Tree *To) {
+ assert(To != nullptr);
+ assert(!From.isNull());
+
+ bool Added = Nodes.insert({From, To}).second;
+ (void)Added;
+ assert(Added && "mapping added twice");
+ }
+
+ syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); }
+
+private:
+ llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes;
+};
+} // namespace
+
/// A helper class for constructing the syntax tree while traversing a clang
/// AST.
///
@@ -57,30 +271,44 @@ public:
}
llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
+ const SourceManager &sourceManager() const { return Arena.sourceManager(); }
/// Populate children for \p New node, assuming it covers tokens from \p
/// Range.
- void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
-
- /// Must be called with the range of each `DeclaratorDecl`. Ensures the
- /// corresponding declarator nodes are covered by `SimpleDeclaration`.
- void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range);
+ void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
+ ASTPtr From) {
+ assert(New);
+ Pending.foldChildren(Arena, Range, New);
+ if (From)
+ Mapping.add(From, New);
+ }
+ void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
+ TypeLoc L) {
+ // FIXME: add mapping for TypeLocs
+ foldNode(Range, New, nullptr);
+ }
/// Notifies that we should not consume trailing semicolon when computing
/// token range of \p D.
- void noticeDeclaratorWithoutSemicolon(Decl *D);
+ void noticeDeclWithoutSemicolon(Decl *D);
/// Mark the \p Child node with a corresponding \p Role. All marked children
/// should be consumed by foldNode.
- /// (!) when called on expressions (clang::Expr is derived from clang::Stmt),
- /// wraps expressions into expression statement.
+ /// When called on expressions (clang::Expr is derived from clang::Stmt),
+ /// wraps expressions into expression statement.
void markStmtChild(Stmt *Child, NodeRole Role);
/// Should be called for expressions in non-statement position to avoid
/// wrapping into expression statement.
void markExprChild(Expr *Child, NodeRole Role);
-
/// Set role for a token starting at \p Loc.
void markChildToken(SourceLocation Loc, NodeRole R);
+ /// Set role for \p T.
+ void markChildToken(const syntax::Token *T, NodeRole R);
+
+ /// Set role for \p N.
+ void markChild(syntax::Node *N, NodeRole R);
+ /// Set role for the syntax node matching \p N.
+ void markChild(ASTPtr N, NodeRole R);
/// Finish building the tree and consume the root node.
syntax::TranslationUnit *finalize() && {
@@ -97,8 +325,16 @@ public:
return TU;
}
- /// getRange() finds the syntax tokens corresponding to the passed source
- /// locations.
+ /// Finds a token starting at \p L. The token must exist if \p L is valid.
+ const syntax::Token *findToken(SourceLocation L) const;
+
+ /// Finds the syntax tokens corresponding to the \p SourceRange.
+ llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const {
+ assert(Range.isValid());
+ return getRange(Range.getBegin(), Range.getEnd());
+ }
+
+ /// Finds the syntax tokens corresponding to the passed source locations.
/// \p First is the start position of the first token and \p Last is the start
/// position of the last token.
llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
@@ -109,23 +345,62 @@ public:
Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
}
- llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
- auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc());
- if (llvm::isa<NamespaceDecl>(D))
- return Tokens;
- if (DeclsWithoutSemicolons.count(D))
- return Tokens;
- // FIXME: do not consume trailing semicolon on function definitions.
- // Most declarations own a semicolon in syntax trees, but not in clang AST.
- return withTrailingSemicolon(Tokens);
+
+ llvm::ArrayRef<syntax::Token>
+ getTemplateRange(const ClassTemplateSpecializationDecl *D) const {
+ auto Tokens = getRange(D->getSourceRange());
+ return maybeAppendSemicolon(Tokens, D);
}
+
+ /// Returns true if \p D is the last declarator in a chain and is thus
+ /// reponsible for creating SimpleDeclaration for the whole chain.
+ template <class T>
+ bool isResponsibleForCreatingDeclaration(const T *D) const {
+ static_assert((std::is_base_of<DeclaratorDecl, T>::value ||
+ std::is_base_of<TypedefNameDecl, T>::value),
+ "only DeclaratorDecl and TypedefNameDecl are supported.");
+
+ const Decl *Next = D->getNextDeclInContext();
+
+ // There's no next sibling, this one is responsible.
+ if (Next == nullptr) {
+ return true;
+ }
+ const auto *NextT = llvm::dyn_cast<T>(Next);
+
+ // Next sibling is not the same type, this one is responsible.
+ if (NextT == nullptr) {
+ return true;
+ }
+ // Next sibling doesn't begin at the same loc, it must be a different
+ // declaration, so this declarator is responsible.
+ if (NextT->getBeginLoc() != D->getBeginLoc()) {
+ return true;
+ }
+
+ // NextT is a member of the same declaration, and we need the last member to
+ // create declaration. This one is not responsible.
+ return false;
+ }
+
+ llvm::ArrayRef<syntax::Token> getDeclarationRange(Decl *D) {
+ llvm::ArrayRef<clang::syntax::Token> Tokens;
+ // We want to drop the template parameters for specializations.
+ if (const auto *S = llvm::dyn_cast<TagDecl>(D))
+ Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc());
+ else
+ Tokens = getRange(D->getSourceRange());
+ return maybeAppendSemicolon(Tokens, D);
+ }
+
llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
- return getRange(E->getBeginLoc(), E->getEndLoc());
+ return getRange(E->getSourceRange());
}
+
/// Find the adjusted range for the statement, consuming the trailing
/// semicolon when needed.
llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {
- auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());
+ auto Tokens = getRange(S->getSourceRange());
if (isa<CompoundStmt>(S))
return Tokens;
@@ -138,17 +413,31 @@ public:
private:
llvm::ArrayRef<syntax::Token>
+ maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens,
+ const Decl *D) const {
+ if (llvm::isa<NamespaceDecl>(D))
+ return Tokens;
+ if (DeclsWithoutSemicolons.count(D))
+ return Tokens;
+ // FIXME: do not consume trailing semicolon on function definitions.
+ // Most declarations own a semicolon in syntax trees, but not in clang AST.
+ return withTrailingSemicolon(Tokens);
+ }
+
+ llvm::ArrayRef<syntax::Token>
withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const {
assert(!Tokens.empty());
assert(Tokens.back().kind() != tok::eof);
- // (!) we never consume 'eof', so looking at the next token is ok.
+ // We never consume 'eof', so looking at the next token is ok.
if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
return Tokens;
}
- /// Finds a token starting at \p L. The token must exist.
- const syntax::Token *findToken(SourceLocation L) const;
+ void setRole(syntax::Node *N, NodeRole R) {
+ assert(N->role() == NodeRole::Detached);
+ N->setRole(R);
+ }
/// A collection of trees covering the input tokens.
/// When created, each tree corresponds to a single token in the file.
@@ -166,12 +455,10 @@ private:
auto *L = new (A.allocator()) syntax::Leaf(&T);
L->Original = true;
L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue();
- Trees.insert(Trees.end(), {&T, NodeAndRole{L}});
+ Trees.insert(Trees.end(), {&T, L});
}
}
- ~Forest() { assert(DelayedFolds.empty()); }
-
void assignRole(llvm::ArrayRef<syntax::Token> Range,
syntax::NodeRole Role) {
assert(!Range.empty());
@@ -181,56 +468,49 @@ private:
assert((std::next(It) == Trees.end() ||
std::next(It)->first == Range.end()) &&
"no child with the specified range");
- It->second.Role = Role;
+ assert(It->second->role() == NodeRole::Detached &&
+ "re-assigning role for a child");
+ It->second->setRole(Role);
}
/// Add \p Node to the forest and attach child nodes based on \p Tokens.
void foldChildren(const syntax::Arena &A,
llvm::ArrayRef<syntax::Token> Tokens,
syntax::Tree *Node) {
- // Execute delayed folds inside `Tokens`.
- auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin());
- auto It = BeginExecuted;
- for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It)
- foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End),
- It->second.Node);
- DelayedFolds.erase(BeginExecuted, It);
-
// Attach children to `Node`.
- foldChildrenEager(A, Tokens, Node);
- }
+ assert(Node->firstChild() == nullptr && "node already has children");
- /// Schedule a call to `foldChildren` that will only be executed when
- /// containing node is folded. The range of delayed nodes can be extended by
- /// calling `extendDelayedFold`. Only one delayed node for each starting
- /// token is allowed.
- void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens,
- syntax::Tree *Node) {
- assert(!Tokens.empty());
- bool Inserted =
- DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}})
- .second;
- (void)Inserted;
- assert(Inserted && "Multiple delayed folds start at the same token");
- }
+ auto *FirstToken = Tokens.begin();
+ auto BeginChildren = Trees.lower_bound(FirstToken);
- /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends
- /// its endpoint to `ExtendedRange.end()` and returns true.
- /// Otherwise, returns false.
- bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) {
- assert(!ExtendedRange.empty());
- auto It = DelayedFolds.find(ExtendedRange.data());
- if (It == DelayedFolds.end())
- return false;
- assert(It->second.End <= ExtendedRange.end());
- It->second.End = ExtendedRange.end();
- return true;
+ assert((BeginChildren == Trees.end() ||
+ BeginChildren->first == FirstToken) &&
+ "fold crosses boundaries of existing subtrees");
+ auto EndChildren = Trees.lower_bound(Tokens.end());
+ assert(
+ (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
+ "fold crosses boundaries of existing subtrees");
+
+ // We need to go in reverse order, because we can only prepend.
+ for (auto It = EndChildren; It != BeginChildren; --It) {
+ auto *C = std::prev(It)->second;
+ if (C->role() == NodeRole::Detached)
+ C->setRole(NodeRole::Unknown);
+ Node->prependChildLowLevel(C);
+ }
+
+ // Mark that this node came from the AST and is backed by the source code.
+ Node->Original = true;
+ Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
+
+ Trees.erase(BeginChildren, EndChildren);
+ Trees.insert({FirstToken, Node});
}
// EXPECTS: all tokens were consumed and are owned by a single root node.
syntax::Node *finalize() && {
assert(Trees.size() == 1);
- auto *Root = Trees.begin()->second.Node;
+ auto *Root = Trees.begin()->second;
Trees = {};
return Root;
}
@@ -243,66 +523,19 @@ private:
? (std::next(It)->first - It->first)
: A.tokenBuffer().expandedTokens().end() - It->first;
- R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n",
- It->second.Node->kind(),
- It->first->text(A.sourceManager()), CoveredTokens);
- R += It->second.Node->dump(A);
+ R += std::string(llvm::formatv(
+ "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(),
+ It->first->text(A.sourceManager()), CoveredTokens));
+ R += It->second->dump(A);
}
return R;
}
private:
- /// Implementation detail of `foldChildren`, does acutal folding ignoring
- /// delayed folds.
- void foldChildrenEager(const syntax::Arena &A,
- llvm::ArrayRef<syntax::Token> Tokens,
- syntax::Tree *Node) {
- assert(Node->firstChild() == nullptr && "node already has children");
-
- auto *FirstToken = Tokens.begin();
- auto BeginChildren = Trees.lower_bound(FirstToken);
- assert((BeginChildren == Trees.end() ||
- BeginChildren->first == FirstToken) &&
- "fold crosses boundaries of existing subtrees");
- auto EndChildren = Trees.lower_bound(Tokens.end());
- assert(
- (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
- "fold crosses boundaries of existing subtrees");
-
- // (!) we need to go in reverse order, because we can only prepend.
- for (auto It = EndChildren; It != BeginChildren; --It)
- Node->prependChildLowLevel(std::prev(It)->second.Node,
- std::prev(It)->second.Role);
-
- // Mark that this node came from the AST and is backed by the source code.
- Node->Original = true;
- Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
-
- Trees.erase(BeginChildren, EndChildren);
- Trees.insert({FirstToken, NodeAndRole(Node)});
- }
- /// A with a role that should be assigned to it when adding to a parent.
- struct NodeAndRole {
- explicit NodeAndRole(syntax::Node *Node)
- : Node(Node), Role(NodeRole::Unknown) {}
-
- syntax::Node *Node;
- NodeRole Role;
- };
-
/// Maps from the start token to a subtree starting at that token.
/// Keys in the map are pointers into the array of expanded tokens, so
/// pointer order corresponds to the order of preprocessor tokens.
- /// FIXME: storing the end tokens is redundant.
- /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
- std::map<const syntax::Token *, NodeAndRole> Trees;
-
- /// See documentation of `foldChildrenDelayed` for details.
- struct DelayedFold {
- const syntax::Token *End = nullptr;
- syntax::Tree *Node = nullptr;
- };
- std::map<const syntax::Token *, DelayedFold> DelayedFolds;
+ std::map<const syntax::Token *, syntax::Node *> Trees;
};
/// For debugging purposes.
@@ -314,49 +547,91 @@ private:
LocationToToken;
Forest Pending;
llvm::DenseSet<Decl *> DeclsWithoutSemicolons;
+ ASTToSyntaxMapping Mapping;
};
namespace {
class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
public:
- explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
- : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
+ explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder)
+ : Builder(Builder), Context(Context) {}
bool shouldTraversePostOrder() const { return true; }
- bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) {
- // Ensure declarators are covered by SimpleDeclaration.
- Builder.noticeDeclaratorRange(Builder.getRange(D));
- // FIXME: build nodes for the declarator too.
- return true;
+ bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) {
+ return processDeclaratorAndDeclaration(DD);
}
- bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) {
- // Also a declarator.
- Builder.noticeDeclaratorRange(Builder.getRange(D));
- // FIXME: build nodes for the declarator too.
- return true;
+
+ bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) {
+ return processDeclaratorAndDeclaration(TD);
}
bool VisitDecl(Decl *D) {
assert(!D->isImplicit());
- Builder.foldNode(Builder.getRange(D),
- new (allocator()) syntax::UnknownDeclaration());
+ Builder.foldNode(Builder.getDeclarationRange(D),
+ new (allocator()) syntax::UnknownDeclaration(), D);
+ return true;
+ }
+
+ // RAV does not call WalkUpFrom* on explicit instantiations, so we have to
+ // override Traverse.
+ // FIXME: make RAV call WalkUpFrom* instead.
+ bool
+ TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) {
+ if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C))
+ return false;
+ if (C->isExplicitSpecialization())
+ return true; // we are only interested in explicit instantiations.
+ auto *Declaration =
+ cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C));
+ foldExplicitTemplateInstantiation(
+ Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()),
+ Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C);
+ return true;
+ }
+
+ bool WalkUpFromTemplateDecl(TemplateDecl *S) {
+ foldTemplateDeclaration(
+ Builder.getDeclarationRange(S),
+ Builder.findToken(S->getTemplateParameters()->getTemplateLoc()),
+ Builder.getDeclarationRange(S->getTemplatedDecl()), S);
return true;
}
bool WalkUpFromTagDecl(TagDecl *C) {
// FIXME: build the ClassSpecifier node.
- if (C->isFreeStanding()) {
- // Class is a declaration specifier and needs a spanning declaration node.
- Builder.foldNode(Builder.getRange(C),
- new (allocator()) syntax::SimpleDeclaration);
+ if (!C->isFreeStanding()) {
+ assert(C->getNumTemplateParameterLists() == 0);
return true;
}
+ handleFreeStandingTagDecl(C);
return true;
}
+ syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) {
+ assert(C->isFreeStanding());
+ // Class is a declaration specifier and needs a spanning declaration node.
+ auto DeclarationRange = Builder.getDeclarationRange(C);
+ syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration;
+ Builder.foldNode(DeclarationRange, Result, nullptr);
+
+ // Build TemplateDeclaration nodes if we had template parameters.
+ auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) {
+ const auto *TemplateKW = Builder.findToken(L.getTemplateLoc());
+ auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end());
+ Result =
+ foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr);
+ DeclarationRange = R;
+ };
+ if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C))
+ ConsumeTemplateParameters(*S->getTemplateParameters());
+ for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I)
+ ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1));
+ return Result;
+ }
+
bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
- // (!) we do not want to call VisitDecl(), the declaration for translation
+ // We do not want to call VisitDecl(), the declaration for translation
// unit is built by finalize().
return true;
}
@@ -370,14 +645,14 @@ public:
Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::CompoundStatement);
+ new (allocator()) syntax::CompoundStatement, S);
return true;
}
// Some statements are not yet handled by syntax trees.
bool WalkUpFromStmt(Stmt *S) {
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::UnknownStatement);
+ new (allocator()) syntax::UnknownStatement, S);
return true;
}
@@ -386,27 +661,28 @@ public:
// RAV traverses it as a statement, we produce invalid node kinds in that
// case.
// FIXME: should do this in RAV instead?
- if (S->getInit() && !TraverseStmt(S->getInit()))
- return false;
- if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
- return false;
- if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
- return false;
- if (S->getBody() && !TraverseStmt(S->getBody()))
- return false;
- return true;
+ bool Result = [&, this]() {
+ if (S->getInit() && !TraverseStmt(S->getInit()))
+ return false;
+ if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
+ return false;
+ if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
+ return false;
+ if (S->getBody() && !TraverseStmt(S->getBody()))
+ return false;
+ return true;
+ }();
+ WalkUpFromCXXForRangeStmt(S);
+ return Result;
}
bool TraverseStmt(Stmt *S) {
if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) {
// We want to consume the semicolon, make sure SimpleDeclaration does not.
for (auto *D : DS->decls())
- Builder.noticeDeclaratorWithoutSemicolon(D);
+ Builder.noticeDeclWithoutSemicolon(D);
} else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
- // (!) do not recurse into subexpressions.
- // we do not have syntax trees for expressions yet, so we only want to see
- // the first top-level expression.
- return WalkUpFromExpr(E->IgnoreImplicit());
+ return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit());
}
return RecursiveASTVisitor::TraverseStmt(S);
}
@@ -415,19 +691,306 @@ public:
bool WalkUpFromExpr(Expr *E) {
assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
Builder.foldNode(Builder.getExprRange(E),
- new (allocator()) syntax::UnknownExpression);
+ new (allocator()) syntax::UnknownExpression, E);
+ return true;
+ }
+
+ syntax::NestedNameSpecifier *
+ BuildNestedNameSpecifier(NestedNameSpecifierLoc QualifierLoc) {
+ if (!QualifierLoc)
+ return nullptr;
+ for (auto it = QualifierLoc; it; it = it.getPrefix()) {
+ auto *NS = new (allocator()) syntax::NameSpecifier;
+ Builder.foldNode(Builder.getRange(it.getLocalSourceRange()), NS, nullptr);
+ Builder.markChild(NS, syntax::NodeRole::NestedNameSpecifier_specifier);
+ }
+ auto *NNS = new (allocator()) syntax::NestedNameSpecifier;
+ Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), NNS,
+ nullptr);
+ return NNS;
+ }
+
+ bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) {
+ // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node
+ // referencing the location of the UDL suffix (`_w` in `1.2_w`). The
+ // UDL suffix location does not point to the beginning of a token, so we
+ // can't represent the UDL suffix as a separate syntax tree node.
+
+ return WalkUpFromUserDefinedLiteral(S);
+ }
+
+ syntax::UserDefinedLiteralExpression *
+ buildUserDefinedLiteral(UserDefinedLiteral *S) {
+ switch (S->getLiteralOperatorKind()) {
+ case clang::UserDefinedLiteral::LOK_Integer:
+ return new (allocator()) syntax::IntegerUserDefinedLiteralExpression;
+ case clang::UserDefinedLiteral::LOK_Floating:
+ return new (allocator()) syntax::FloatUserDefinedLiteralExpression;
+ case clang::UserDefinedLiteral::LOK_Character:
+ return new (allocator()) syntax::CharUserDefinedLiteralExpression;
+ case clang::UserDefinedLiteral::LOK_String:
+ return new (allocator()) syntax::StringUserDefinedLiteralExpression;
+ case clang::UserDefinedLiteral::LOK_Raw:
+ case clang::UserDefinedLiteral::LOK_Template:
+ // For raw literal operator and numeric literal operator template we
+ // cannot get the type of the operand in the semantic AST. We get this
+ // information from the token. As integer and floating point have the same
+ // token kind, we run `NumericLiteralParser` again to distinguish them.
+ auto TokLoc = S->getBeginLoc();
+ auto TokSpelling =
+ Builder.findToken(TokLoc)->text(Context.getSourceManager());
+ auto Literal =
+ NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(),
+ Context.getLangOpts(), Context.getTargetInfo(),
+ Context.getDiagnostics());
+ if (Literal.isIntegerLiteral())
+ return new (allocator()) syntax::IntegerUserDefinedLiteralExpression;
+ else {
+ assert(Literal.isFloatingLiteral());
+ return new (allocator()) syntax::FloatUserDefinedLiteralExpression;
+ }
+ }
+ llvm_unreachable("Unknown literal operator kind.");
+ }
+
+ bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) {
+ Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);
+ Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S);
+ return true;
+ }
+
+ bool WalkUpFromDeclRefExpr(DeclRefExpr *S) {
+ if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc()))
+ Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier);
+
+ auto *unqualifiedId = new (allocator()) syntax::UnqualifiedId;
+ // Get `UnqualifiedId` from `DeclRefExpr`.
+ // FIXME: Extract this logic so that it can be used by `MemberExpr`,
+ // and other semantic constructs, now it is tied to `DeclRefExpr`.
+ if (!S->hasExplicitTemplateArgs()) {
+ Builder.foldNode(Builder.getRange(S->getNameInfo().getSourceRange()),
+ unqualifiedId, nullptr);
+ } else {
+ auto templateIdSourceRange =
+ SourceRange(S->getNameInfo().getBeginLoc(), S->getRAngleLoc());
+ Builder.foldNode(Builder.getRange(templateIdSourceRange), unqualifiedId,
+ nullptr);
+ }
+ Builder.markChild(unqualifiedId, syntax::NodeRole::IdExpression_id);
+
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::IdExpression, S);
+ return true;
+ }
+
+ bool WalkUpFromParenExpr(ParenExpr *S) {
+ Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen);
+ Builder.markExprChild(S->getSubExpr(),
+ syntax::NodeRole::ParenExpression_subExpression);
+ Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::ParenExpression, S);
+ return true;
+ }
+
+ bool WalkUpFromIntegerLiteral(IntegerLiteral *S) {
+ Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::IntegerLiteralExpression, S);
+ return true;
+ }
+
+ bool WalkUpFromCharacterLiteral(CharacterLiteral *S) {
+ Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::CharacterLiteralExpression, S);
+ return true;
+ }
+
+ bool WalkUpFromFloatingLiteral(FloatingLiteral *S) {
+ Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::FloatingLiteralExpression, S);
+ return true;
+ }
+
+ bool WalkUpFromStringLiteral(StringLiteral *S) {
+ Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::StringLiteralExpression, S);
+ return true;
+ }
+
+ bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) {
+ Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::BoolLiteralExpression, S);
+ return true;
+ }
+
+ bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) {
+ Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::CxxNullPtrExpression, S);
+ return true;
+ }
+
+ bool WalkUpFromUnaryOperator(UnaryOperator *S) {
+ Builder.markChildToken(S->getOperatorLoc(),
+ syntax::NodeRole::OperatorExpression_operatorToken);
+ Builder.markExprChild(S->getSubExpr(),
+ syntax::NodeRole::UnaryOperatorExpression_operand);
+
+ if (S->isPostfix())
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::PostfixUnaryOperatorExpression,
+ S);
+ else
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::PrefixUnaryOperatorExpression,
+ S);
+
+ return true;
+ }
+
+ bool WalkUpFromBinaryOperator(BinaryOperator *S) {
+ Builder.markExprChild(
+ S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide);
+ Builder.markChildToken(S->getOperatorLoc(),
+ syntax::NodeRole::OperatorExpression_operatorToken);
+ Builder.markExprChild(
+ S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::BinaryOperatorExpression, S);
return true;
}
+ bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) {
+ if (getOperatorNodeKind(*S) ==
+ syntax::NodeKind::PostfixUnaryOperatorExpression) {
+ // A postfix unary operator is declared as taking two operands. The
+ // second operand is used to distinguish from its prefix counterpart. In
+ // the semantic AST this "phantom" operand is represented as a
+ // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this
+ // operand because it does not correspond to anything written in source
+ // code
+ for (auto *child : S->children()) {
+ if (child->getSourceRange().isInvalid())
+ continue;
+ if (!TraverseStmt(child))
+ return false;
+ }
+ return WalkUpFromCXXOperatorCallExpr(S);
+ } else
+ return RecursiveASTVisitor::TraverseCXXOperatorCallExpr(S);
+ }
+
+ bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) {
+ switch (getOperatorNodeKind(*S)) {
+ case syntax::NodeKind::BinaryOperatorExpression:
+ Builder.markExprChild(
+ S->getArg(0),
+ syntax::NodeRole::BinaryOperatorExpression_leftHandSide);
+ Builder.markChildToken(
+ S->getOperatorLoc(),
+ syntax::NodeRole::OperatorExpression_operatorToken);
+ Builder.markExprChild(
+ S->getArg(1),
+ syntax::NodeRole::BinaryOperatorExpression_rightHandSide);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::BinaryOperatorExpression, S);
+ return true;
+ case syntax::NodeKind::PrefixUnaryOperatorExpression:
+ Builder.markChildToken(
+ S->getOperatorLoc(),
+ syntax::NodeRole::OperatorExpression_operatorToken);
+ Builder.markExprChild(S->getArg(0),
+ syntax::NodeRole::UnaryOperatorExpression_operand);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::PrefixUnaryOperatorExpression,
+ S);
+ return true;
+ case syntax::NodeKind::PostfixUnaryOperatorExpression:
+ Builder.markChildToken(
+ S->getOperatorLoc(),
+ syntax::NodeRole::OperatorExpression_operatorToken);
+ Builder.markExprChild(S->getArg(0),
+ syntax::NodeRole::UnaryOperatorExpression_operand);
+ Builder.foldNode(Builder.getExprRange(S),
+ new (allocator()) syntax::PostfixUnaryOperatorExpression,
+ S);
+ return true;
+ case syntax::NodeKind::UnknownExpression:
+ return RecursiveASTVisitor::WalkUpFromCXXOperatorCallExpr(S);
+ default:
+ llvm_unreachable("getOperatorNodeKind() does not return this value");
+ }
+ }
+
bool WalkUpFromNamespaceDecl(NamespaceDecl *S) {
- auto Tokens = Builder.getRange(S);
+ auto Tokens = Builder.getDeclarationRange(S);
if (Tokens.front().kind() == tok::coloncolon) {
// Handle nested namespace definitions. Those start at '::' token, e.g.
// namespace a^::b {}
// FIXME: build corresponding nodes for the name of this namespace.
return true;
}
- Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition);
+ Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S);
+ return true;
+ }
+
+ bool TraverseParenTypeLoc(ParenTypeLoc L) {
+ // We reverse order of traversal to get the proper syntax structure.
+ if (!WalkUpFromParenTypeLoc(L))
+ return false;
+ return TraverseTypeLoc(L.getInnerLoc());
+ }
+
+ bool WalkUpFromParenTypeLoc(ParenTypeLoc L) {
+ Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
+ Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
+ Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()),
+ new (allocator()) syntax::ParenDeclarator, L);
+ return true;
+ }
+
+ // Declarator chunks, they are produced by type locs and some clang::Decls.
+ bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) {
+ Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen);
+ Builder.markExprChild(L.getSizeExpr(),
+ syntax::NodeRole::ArraySubscript_sizeExpression);
+ Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen);
+ Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()),
+ new (allocator()) syntax::ArraySubscript, L);
+ return true;
+ }
+
+ bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) {
+ Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
+ for (auto *P : L.getParams()) {
+ Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter);
+ }
+ Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
+ Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()),
+ new (allocator()) syntax::ParametersAndQualifiers, L);
+ return true;
+ }
+
+ bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) {
+ if (!L.getTypePtr()->hasTrailingReturn())
+ return WalkUpFromFunctionTypeLoc(L);
+
+ auto *TrailingReturnTokens = BuildTrailingReturn(L);
+ // Finish building the node for parameters.
+ Builder.markChild(TrailingReturnTokens,
+ syntax::NodeRole::ParametersAndQualifiers_trailingReturn);
+ return WalkUpFromFunctionTypeLoc(L);
+ }
+
+ bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) {
+ auto SR = L.getLocalSourceRange();
+ Builder.foldNode(Builder.getRange(SR),
+ new (allocator()) syntax::MemberPointer, L);
return true;
}
@@ -436,13 +999,13 @@ public:
// and fold resulting nodes.
bool WalkUpFromDeclStmt(DeclStmt *S) {
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::DeclarationStatement);
+ new (allocator()) syntax::DeclarationStatement, S);
return true;
}
bool WalkUpFromNullStmt(NullStmt *S) {
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::EmptyStatement);
+ new (allocator()) syntax::EmptyStatement, S);
return true;
}
@@ -451,7 +1014,7 @@ public:
syntax::NodeRole::IntroducerKeyword);
Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::SwitchStatement);
+ new (allocator()) syntax::SwitchStatement, S);
return true;
}
@@ -461,7 +1024,7 @@ public:
Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::CaseStatement);
+ new (allocator()) syntax::CaseStatement, S);
return true;
}
@@ -470,7 +1033,7 @@ public:
syntax::NodeRole::IntroducerKeyword);
Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::DefaultStatement);
+ new (allocator()) syntax::DefaultStatement, S);
return true;
}
@@ -483,7 +1046,7 @@ public:
Builder.markStmtChild(S->getElse(),
syntax::NodeRole::IfStatement_elseStatement);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::IfStatement);
+ new (allocator()) syntax::IfStatement, S);
return true;
}
@@ -491,7 +1054,7 @@ public:
Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::ForStatement);
+ new (allocator()) syntax::ForStatement, S);
return true;
}
@@ -500,7 +1063,7 @@ public:
syntax::NodeRole::IntroducerKeyword);
Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::WhileStatement);
+ new (allocator()) syntax::WhileStatement, S);
return true;
}
@@ -508,7 +1071,7 @@ public:
Builder.markChildToken(S->getContinueLoc(),
syntax::NodeRole::IntroducerKeyword);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::ContinueStatement);
+ new (allocator()) syntax::ContinueStatement, S);
return true;
}
@@ -516,7 +1079,7 @@ public:
Builder.markChildToken(S->getBreakLoc(),
syntax::NodeRole::IntroducerKeyword);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::BreakStatement);
+ new (allocator()) syntax::BreakStatement, S);
return true;
}
@@ -526,7 +1089,7 @@ public:
Builder.markExprChild(S->getRetValue(),
syntax::NodeRole::ReturnStatement_value);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::ReturnStatement);
+ new (allocator()) syntax::ReturnStatement, S);
return true;
}
@@ -534,13 +1097,13 @@ public:
Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
Builder.foldNode(Builder.getStmtRange(S),
- new (allocator()) syntax::RangeBasedForStatement);
+ new (allocator()) syntax::RangeBasedForStatement, S);
return true;
}
bool WalkUpFromEmptyDecl(EmptyDecl *S) {
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::EmptyDeclaration);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::EmptyDeclaration, S);
return true;
}
@@ -549,76 +1112,175 @@ public:
syntax::NodeRole::StaticAssertDeclaration_condition);
Builder.markExprChild(S->getMessage(),
syntax::NodeRole::StaticAssertDeclaration_message);
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::StaticAssertDeclaration);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::StaticAssertDeclaration, S);
return true;
}
bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) {
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::LinkageSpecificationDeclaration);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::LinkageSpecificationDeclaration,
+ S);
return true;
}
bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) {
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::NamespaceAliasDefinition);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::NamespaceAliasDefinition, S);
return true;
}
bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) {
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::UsingNamespaceDirective);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::UsingNamespaceDirective, S);
return true;
}
bool WalkUpFromUsingDecl(UsingDecl *S) {
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::UsingDeclaration);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::UsingDeclaration, S);
return true;
}
bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) {
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::UsingDeclaration);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::UsingDeclaration, S);
return true;
}
bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) {
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::UsingDeclaration);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::UsingDeclaration, S);
return true;
}
bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) {
- Builder.foldNode(Builder.getRange(S),
- new (allocator()) syntax::TypeAliasDeclaration);
+ Builder.foldNode(Builder.getDeclarationRange(S),
+ new (allocator()) syntax::TypeAliasDeclaration, S);
return true;
}
private:
+ template <class T> SourceLocation getQualifiedNameStart(T *D) {
+ static_assert((std::is_base_of<DeclaratorDecl, T>::value ||
+ std::is_base_of<TypedefNameDecl, T>::value),
+ "only DeclaratorDecl and TypedefNameDecl are supported.");
+
+ auto DN = D->getDeclName();
+ bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo();
+ if (IsAnonymous)
+ return SourceLocation();
+
+ if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D)) {
+ if (DD->getQualifierLoc()) {
+ return DD->getQualifierLoc().getBeginLoc();
+ }
+ }
+
+ return D->getLocation();
+ }
+
+ SourceRange getInitializerRange(Decl *D) {
+ if (auto *V = llvm::dyn_cast<VarDecl>(D)) {
+ auto *I = V->getInit();
+ // Initializers in range-based-for are not part of the declarator
+ if (I && !V->isCXXForRangeDecl())
+ return I->getSourceRange();
+ }
+
+ return SourceRange();
+ }
+
+ /// Folds SimpleDeclarator node (if present) and in case this is the last
+ /// declarator in the chain it also folds SimpleDeclaration node.
+ template <class T> bool processDeclaratorAndDeclaration(T *D) {
+ SourceRange Initializer = getInitializerRange(D);
+ auto Range = getDeclaratorRange(Builder.sourceManager(),
+ D->getTypeSourceInfo()->getTypeLoc(),
+ getQualifiedNameStart(D), Initializer);
+
+ // There doesn't have to be a declarator (e.g. `void foo(int)` only has
+ // declaration, but no declarator).
+ if (Range.getBegin().isValid()) {
+ auto *N = new (allocator()) syntax::SimpleDeclarator;
+ Builder.foldNode(Builder.getRange(Range), N, nullptr);
+ Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator);
+ }
+
+ if (Builder.isResponsibleForCreatingDeclaration(D)) {
+ Builder.foldNode(Builder.getDeclarationRange(D),
+ new (allocator()) syntax::SimpleDeclaration, D);
+ }
+ return true;
+ }
+
+ /// Returns the range of the built node.
+ syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) {
+ assert(L.getTypePtr()->hasTrailingReturn());
+
+ auto ReturnedType = L.getReturnLoc();
+ // Build node for the declarator, if any.
+ auto ReturnDeclaratorRange =
+ getDeclaratorRange(this->Builder.sourceManager(), ReturnedType,
+ /*Name=*/SourceLocation(),
+ /*Initializer=*/SourceLocation());
+ syntax::SimpleDeclarator *ReturnDeclarator = nullptr;
+ if (ReturnDeclaratorRange.isValid()) {
+ ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator;
+ Builder.foldNode(Builder.getRange(ReturnDeclaratorRange),
+ ReturnDeclarator, nullptr);
+ }
+
+ // Build node for trailing return type.
+ auto Return = Builder.getRange(ReturnedType.getSourceRange());
+ const auto *Arrow = Return.begin() - 1;
+ assert(Arrow->kind() == tok::arrow);
+ auto Tokens = llvm::makeArrayRef(Arrow, Return.end());
+ Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken);
+ if (ReturnDeclarator)
+ Builder.markChild(ReturnDeclarator,
+ syntax::NodeRole::TrailingReturnType_declarator);
+ auto *R = new (allocator()) syntax::TrailingReturnType;
+ Builder.foldNode(Tokens, R, L);
+ return R;
+ }
+
+ void foldExplicitTemplateInstantiation(
+ ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW,
+ const syntax::Token *TemplateKW,
+ syntax::SimpleDeclaration *InnerDeclaration, Decl *From) {
+ assert(!ExternKW || ExternKW->kind() == tok::kw_extern);
+ assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
+ Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword);
+ Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
+ Builder.markChild(
+ InnerDeclaration,
+ syntax::NodeRole::ExplicitTemplateInstantiation_declaration);
+ Builder.foldNode(
+ Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From);
+ }
+
+ syntax::TemplateDeclaration *foldTemplateDeclaration(
+ ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW,
+ ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) {
+ assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
+ Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
+
+ auto *N = new (allocator()) syntax::TemplateDeclaration;
+ Builder.foldNode(Range, N, From);
+ Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration);
+ return N;
+ }
+
/// A small helper to save some typing.
llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
syntax::TreeBuilder &Builder;
- const LangOptions &LangOpts;
+ const ASTContext &Context;
};
} // namespace
-void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
- syntax::Tree *New) {
- Pending.foldChildren(Arena, Range, New);
-}
-
-void syntax::TreeBuilder::noticeDeclaratorRange(
- llvm::ArrayRef<syntax::Token> Range) {
- if (Pending.extendDelayedFold(Range))
- return;
- Pending.foldChildrenDelayed(Range,
- new (allocator()) syntax::SimpleDeclaration);
-}
-
-void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) {
+void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) {
DeclsWithoutSemicolons.insert(D);
}
@@ -628,31 +1290,55 @@ void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) {
Pending.assignRole(*findToken(Loc), Role);
}
+void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) {
+ if (!T)
+ return;
+ Pending.assignRole(*T, R);
+}
+
+void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) {
+ assert(N);
+ setRole(N, R);
+}
+
+void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) {
+ auto *SN = Mapping.find(N);
+ assert(SN != nullptr);
+ setRole(SN, R);
+}
+
void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
if (!Child)
return;
- auto Range = getStmtRange(Child);
- // This is an expression in a statement position, consume the trailing
- // semicolon and form an 'ExpressionStatement' node.
- if (auto *E = dyn_cast<Expr>(Child)) {
- Pending.assignRole(getExprRange(E),
- NodeRole::ExpressionStatement_expression);
- // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
- Pending.foldChildren(Arena, Range,
- new (allocator()) syntax::ExpressionStatement);
- }
- Pending.assignRole(Range, Role);
+ syntax::Tree *ChildNode;
+ if (Expr *ChildExpr = dyn_cast<Expr>(Child)) {
+ // This is an expression in a statement position, consume the trailing
+ // semicolon and form an 'ExpressionStatement' node.
+ markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression);
+ ChildNode = new (allocator()) syntax::ExpressionStatement;
+ // (!) 'getStmtRange()' ensures this covers a trailing semicolon.
+ Pending.foldChildren(Arena, getStmtRange(Child), ChildNode);
+ } else {
+ ChildNode = Mapping.find(Child);
+ }
+ assert(ChildNode != nullptr);
+ setRole(ChildNode, Role);
}
void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {
if (!Child)
return;
+ Child = Child->IgnoreImplicit();
- Pending.assignRole(getExprRange(Child), Role);
+ syntax::Tree *ChildNode = Mapping.find(Child);
+ assert(ChildNode != nullptr);
+ setRole(ChildNode, Role);
}
const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
+ if (L.isInvalid())
+ return nullptr;
auto It = LocationToToken.find(L.getRawEncoding());
assert(It != LocationToToken.end());
return It->second;
diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp
index 72458528202e5..24048b297a112 100644
--- a/clang/lib/Tooling/Syntax/Mutations.cpp
+++ b/clang/lib/Tooling/Syntax/Mutations.cpp
@@ -35,7 +35,7 @@ public:
assert(!New->isDetached());
assert(Role != NodeRole::Detached);
- New->Role = static_cast<unsigned>(Role);
+ New->setRole(Role);
auto *P = Anchor->parent();
P->replaceChildRangeLowLevel(Anchor, Anchor, New);
diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp
index 5b0c5107c134c..2435ae0a91dd6 100644
--- a/clang/lib/Tooling/Syntax/Nodes.cpp
+++ b/clang/lib/Tooling/Syntax/Nodes.cpp
@@ -18,6 +18,38 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
return OS << "TranslationUnit";
case NodeKind::UnknownExpression:
return OS << "UnknownExpression";
+ case NodeKind::ParenExpression:
+ return OS << "ParenExpression";
+ case NodeKind::IntegerLiteralExpression:
+ return OS << "IntegerLiteralExpression";
+ case NodeKind::CharacterLiteralExpression:
+ return OS << "CharacterLiteralExpression";
+ case NodeKind::FloatingLiteralExpression:
+ return OS << "FloatingLiteralExpression";
+ case NodeKind::StringLiteralExpression:
+ return OS << "StringLiteralExpression";
+ case NodeKind::BoolLiteralExpression:
+ return OS << "BoolLiteralExpression";
+ case NodeKind::CxxNullPtrExpression:
+ return OS << "CxxNullPtrExpression";
+ case NodeKind::IntegerUserDefinedLiteralExpression:
+ return OS << "IntegerUserDefinedLiteralExpression";
+ case NodeKind::FloatUserDefinedLiteralExpression:
+ return OS << "FloatUserDefinedLiteralExpression";
+ case NodeKind::CharUserDefinedLiteralExpression:
+ return OS << "CharUserDefinedLiteralExpression";
+ case NodeKind::StringUserDefinedLiteralExpression:
+ return OS << "StringUserDefinedLiteralExpression";
+ case NodeKind::PrefixUnaryOperatorExpression:
+ return OS << "PrefixUnaryOperatorExpression";
+ case NodeKind::PostfixUnaryOperatorExpression:
+ return OS << "PostfixUnaryOperatorExpression";
+ case NodeKind::BinaryOperatorExpression:
+ return OS << "BinaryOperatorExpression";
+ case NodeKind::UnqualifiedId:
+ return OS << "UnqualifiedId";
+ case NodeKind::IdExpression:
+ return OS << "IdExpression";
case NodeKind::UnknownStatement:
return OS << "UnknownStatement";
case NodeKind::DeclarationStatement:
@@ -58,6 +90,10 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
return OS << "LinkageSpecificationDeclaration";
case NodeKind::SimpleDeclaration:
return OS << "SimpleDeclaration";
+ case NodeKind::TemplateDeclaration:
+ return OS << "TemplateDeclaration";
+ case NodeKind::ExplicitTemplateInstantiation:
+ return OS << "ExplicitTemplateInstantiation";
case NodeKind::NamespaceDefinition:
return OS << "NamespaceDefinition";
case NodeKind::NamespaceAliasDefinition:
@@ -68,6 +104,22 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
return OS << "UsingDeclaration";
case NodeKind::TypeAliasDeclaration:
return OS << "TypeAliasDeclaration";
+ case NodeKind::SimpleDeclarator:
+ return OS << "SimpleDeclarator";
+ case NodeKind::ParenDeclarator:
+ return OS << "ParenDeclarator";
+ case NodeKind::ArraySubscript:
+ return OS << "ArraySubscript";
+ case NodeKind::TrailingReturnType:
+ return OS << "TrailingReturnType";
+ case NodeKind::ParametersAndQualifiers:
+ return OS << "ParametersAndQualifiers";
+ case NodeKind::MemberPointer:
+ return OS << "MemberPointer";
+ case NodeKind::NameSpecifier:
+ return OS << "NameSpecifier";
+ case NodeKind::NestedNameSpecifier:
+ return OS << "NestedNameSpecifier";
}
llvm_unreachable("unknown node kind");
}
@@ -84,6 +136,12 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
return OS << "CloseParen";
case syntax::NodeRole::IntroducerKeyword:
return OS << "IntroducerKeyword";
+ case syntax::NodeRole::LiteralToken:
+ return OS << "LiteralToken";
+ case syntax::NodeRole::ArrowToken:
+ return OS << "ArrowToken";
+ case syntax::NodeRole::ExternKeyword:
+ return OS << "ExternKeyword";
case syntax::NodeRole::BodyStatement:
return OS << "BodyStatement";
case syntax::NodeRole::CaseStatement_value:
@@ -94,6 +152,14 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
return OS << "IfStatement_elseKeyword";
case syntax::NodeRole::IfStatement_elseStatement:
return OS << "IfStatement_elseStatement";
+ case syntax::NodeRole::OperatorExpression_operatorToken:
+ return OS << "OperatorExpression_operatorToken";
+ case syntax::NodeRole::UnaryOperatorExpression_operand:
+ return OS << "UnaryOperatorExpression_operand";
+ case syntax::NodeRole::BinaryOperatorExpression_leftHandSide:
+ return OS << "BinaryOperatorExpression_leftHandSide";
+ case syntax::NodeRole::BinaryOperatorExpression_rightHandSide:
+ return OS << "BinaryOperatorExpression_rightHandSide";
case syntax::NodeRole::ReturnStatement_value:
return OS << "ReturnStatement_value";
case syntax::NodeRole::ExpressionStatement_expression:
@@ -104,10 +170,126 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
return OS << "StaticAssertDeclaration_condition";
case syntax::NodeRole::StaticAssertDeclaration_message:
return OS << "StaticAssertDeclaration_message";
+ case syntax::NodeRole::SimpleDeclaration_declarator:
+ return OS << "SimpleDeclaration_declarator";
+ case syntax::NodeRole::TemplateDeclaration_declaration:
+ return OS << "TemplateDeclaration_declaration";
+ case syntax::NodeRole::ExplicitTemplateInstantiation_declaration:
+ return OS << "ExplicitTemplateInstantiation_declaration";
+ case syntax::NodeRole::ArraySubscript_sizeExpression:
+ return OS << "ArraySubscript_sizeExpression";
+ case syntax::NodeRole::TrailingReturnType_declarator:
+ return OS << "TrailingReturnType_declarator";
+ case syntax::NodeRole::ParametersAndQualifiers_parameter:
+ return OS << "ParametersAndQualifiers_parameter";
+ case syntax::NodeRole::ParametersAndQualifiers_trailingReturn:
+ return OS << "ParametersAndQualifiers_trailingReturn";
+ case syntax::NodeRole::IdExpression_id:
+ return OS << "IdExpression_id";
+ case syntax::NodeRole::IdExpression_qualifier:
+ return OS << "IdExpression_qualifier";
+ case syntax::NodeRole::NestedNameSpecifier_specifier:
+ return OS << "NestedNameSpecifier_specifier";
+ case syntax::NodeRole::ParenExpression_subExpression:
+ return OS << "ParenExpression_subExpression";
}
llvm_unreachable("invalid role");
}
+std::vector<syntax::NameSpecifier *> syntax::NestedNameSpecifier::specifiers() {
+ std::vector<syntax::NameSpecifier *> Children;
+ for (auto *C = firstChild(); C; C = C->nextSibling()) {
+ assert(C->role() == syntax::NodeRole::NestedNameSpecifier_specifier);
+ Children.push_back(llvm::cast<syntax::NameSpecifier>(C));
+ }
+ return Children;
+}
+
+syntax::NestedNameSpecifier *syntax::IdExpression::qualifier() {
+ return llvm::cast_or_null<syntax::NestedNameSpecifier>(
+ findChild(syntax::NodeRole::IdExpression_qualifier));
+}
+
+syntax::UnqualifiedId *syntax::IdExpression::unqualifiedId() {
+ return llvm::cast_or_null<syntax::UnqualifiedId>(
+ findChild(syntax::NodeRole::IdExpression_id));
+}
+
+syntax::Leaf *syntax::ParenExpression::openParen() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::OpenParen));
+}
+
+syntax::Expression *syntax::ParenExpression::subExpression() {
+ return llvm::cast_or_null<syntax::Expression>(
+ findChild(syntax::NodeRole::ParenExpression_subExpression));
+}
+
+syntax::Leaf *syntax::ParenExpression::closeParen() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::CloseParen));
+}
+
+syntax::Leaf *syntax::IntegerLiteralExpression::literalToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::CharacterLiteralExpression::literalToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::FloatingLiteralExpression::literalToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::StringLiteralExpression::literalToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::BoolLiteralExpression::literalToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::CxxNullPtrExpression::nullPtrKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::LiteralToken));
+}
+
+syntax::Expression *syntax::BinaryOperatorExpression::lhs() {
+ return llvm::cast_or_null<syntax::Expression>(
+ findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide));
+}
+
+syntax::Leaf *syntax::UnaryOperatorExpression::operatorToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::OperatorExpression_operatorToken));
+}
+
+syntax::Expression *syntax::UnaryOperatorExpression::operand() {
+ return llvm::cast_or_null<syntax::Expression>(
+ findChild(syntax::NodeRole::UnaryOperatorExpression_operand));
+}
+
+syntax::Leaf *syntax::BinaryOperatorExpression::operatorToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::OperatorExpression_operatorToken));
+}
+
+syntax::Expression *syntax::BinaryOperatorExpression::rhs() {
+ return llvm::cast_or_null<syntax::Expression>(
+ findChild(syntax::NodeRole::BinaryOperatorExpression_rightHandSide));
+}
+
syntax::Leaf *syntax::SwitchStatement::switchKeyword() {
return llvm::cast_or_null<syntax::Leaf>(
findChild(syntax::NodeRole::IntroducerKeyword));
@@ -226,8 +408,8 @@ syntax::Leaf *syntax::CompoundStatement::lbrace() {
std::vector<syntax::Statement *> syntax::CompoundStatement::statements() {
std::vector<syntax::Statement *> Children;
for (auto *C = firstChild(); C; C = C->nextSibling()) {
- if (C->role() == syntax::NodeRole::CompoundStatement_statement)
- Children.push_back(llvm::cast<syntax::Statement>(C));
+ assert(C->role() == syntax::NodeRole::CompoundStatement_statement);
+ Children.push_back(llvm::cast<syntax::Statement>(C));
}
return Children;
}
@@ -246,3 +428,98 @@ syntax::Expression *syntax::StaticAssertDeclaration::message() {
return llvm::cast_or_null<syntax::Expression>(
findChild(syntax::NodeRole::StaticAssertDeclaration_message));
}
+
+std::vector<syntax::SimpleDeclarator *>
+syntax::SimpleDeclaration::declarators() {
+ std::vector<syntax::SimpleDeclarator *> Children;
+ for (auto *C = firstChild(); C; C = C->nextSibling()) {
+ if (C->role() == syntax::NodeRole::SimpleDeclaration_declarator)
+ Children.push_back(llvm::cast<syntax::SimpleDeclarator>(C));
+ }
+ return Children;
+}
+
+syntax::Leaf *syntax::TemplateDeclaration::templateKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Declaration *syntax::TemplateDeclaration::declaration() {
+ return llvm::cast_or_null<syntax::Declaration>(
+ findChild(syntax::NodeRole::TemplateDeclaration_declaration));
+}
+
+syntax::Leaf *syntax::ExplicitTemplateInstantiation::templateKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Leaf *syntax::ExplicitTemplateInstantiation::externKeyword() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::ExternKeyword));
+}
+
+syntax::Declaration *syntax::ExplicitTemplateInstantiation::declaration() {
+ return llvm::cast_or_null<syntax::Declaration>(
+ findChild(syntax::NodeRole::ExplicitTemplateInstantiation_declaration));
+}
+
+syntax::Leaf *syntax::ParenDeclarator::lparen() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::OpenParen));
+}
+
+syntax::Leaf *syntax::ParenDeclarator::rparen() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::CloseParen));
+}
+
+syntax::Leaf *syntax::ArraySubscript::lbracket() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::OpenParen));
+}
+
+syntax::Expression *syntax::ArraySubscript::sizeExpression() {
+ return llvm::cast_or_null<syntax::Expression>(
+ findChild(syntax::NodeRole::ArraySubscript_sizeExpression));
+}
+
+syntax::Leaf *syntax::ArraySubscript::rbracket() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::CloseParen));
+}
+
+syntax::Leaf *syntax::TrailingReturnType::arrowToken() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::ArrowToken));
+}
+
+syntax::SimpleDeclarator *syntax::TrailingReturnType::declarator() {
+ return llvm::cast_or_null<syntax::SimpleDeclarator>(
+ findChild(syntax::NodeRole::TrailingReturnType_declarator));
+}
+
+syntax::Leaf *syntax::ParametersAndQualifiers::lparen() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::OpenParen));
+}
+
+std::vector<syntax::SimpleDeclaration *>
+syntax::ParametersAndQualifiers::parameters() {
+ std::vector<syntax::SimpleDeclaration *> Children;
+ for (auto *C = firstChild(); C; C = C->nextSibling()) {
+ if (C->role() == syntax::NodeRole::ParametersAndQualifiers_parameter)
+ Children.push_back(llvm::cast<syntax::SimpleDeclaration>(C));
+ }
+ return Children;
+}
+
+syntax::Leaf *syntax::ParametersAndQualifiers::rparen() {
+ return llvm::cast_or_null<syntax::Leaf>(
+ findChild(syntax::NodeRole::CloseParen));
+}
+
+syntax::TrailingReturnType *syntax::ParametersAndQualifiers::trailingReturn() {
+ return llvm::cast_or_null<syntax::TrailingReturnType>(
+ findChild(syntax::NodeRole::ParametersAndQualifiers_trailingReturn));
+}
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index 3df1c064923a1..c6b904822b8b5 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -35,6 +35,69 @@
using namespace clang;
using namespace clang::syntax;
+namespace {
+// Finds the smallest consecutive subsuquence of Toks that covers R.
+llvm::ArrayRef<syntax::Token>
+getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
+ const SourceManager &SM) {
+ if (R.isInvalid())
+ return {};
+ const syntax::Token *Begin =
+ llvm::partition_point(Toks, [&](const syntax::Token &T) {
+ return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
+ });
+ const syntax::Token *End =
+ llvm::partition_point(Toks, [&](const syntax::Token &T) {
+ return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
+ });
+ if (Begin > End)
+ return {};
+ return {Begin, End};
+}
+
+// Finds the smallest expansion range that contains expanded tokens First and
+// Last, e.g.:
+// #define ID(x) x
+// ID(ID(ID(a1) a2))
+// ~~ -> a1
+// ~~ -> a2
+// ~~~~~~~~~ -> a1 a2
+SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
+ const syntax::Token &Last,
+ const SourceManager &SM) {
+ SourceRange Res;
+ auto FirstLoc = First.location(), LastLoc = Last.location();
+ // Keep traversing up the spelling chain as longs as tokens are part of the
+ // same expansion.
+ while (!FirstLoc.isFileID() && !LastLoc.isFileID()) {
+ auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
+ auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
+ // Stop if expansions have diverged.
+ if (ExpInfoFirst.getExpansionLocStart() !=
+ ExpInfoLast.getExpansionLocStart())
+ break;
+ // Do not continue into macro bodies.
+ if (!ExpInfoFirst.isMacroArgExpansion() ||
+ !ExpInfoLast.isMacroArgExpansion())
+ break;
+ FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
+ LastLoc = SM.getImmediateSpellingLoc(LastLoc);
+ // Update the result afterwards, as we want the tokens that triggered the
+ // expansion.
+ Res = {FirstLoc, LastLoc};
+ }
+ // Normally mapping back to expansion location here only changes FileID, as
+ // we've already found some tokens expanded from the same macro argument, and
+ // they should map to a consecutive subset of spelled tokens. Unfortunately
+ // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
+ // based on their FileID in addition to offsets. So even though we are
+ // referring to same tokens, SourceManager might tell us that one is before
+ // the other if they've got different FileIDs.
+ return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
+}
+
+} // namespace
+
syntax::Token::Token(SourceLocation Location, unsigned Length,
tok::TokenKind Kind)
: Location(Location), Length(Length), Kind(Kind) {
@@ -67,7 +130,8 @@ FileRange syntax::Token::range(const SourceManager &SM,
auto F = First.range(SM);
auto L = Last.range(SM);
assert(F.file() == L.file() && "tokens from different files");
- assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens");
+ assert((F == L || F.endOffset() <= L.beginOffset()) &&
+ "wrong order of tokens");
return FileRange(F.file(), F.beginOffset(), L.endOffset());
}
@@ -120,19 +184,7 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const {
}
llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
- if (R.isInvalid())
- return {};
- const Token *Begin =
- llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
- return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
- });
- const Token *End =
- llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
- return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
- });
- if (Begin > End)
- return {};
- return {Begin, End};
+ return getTokensCovering(expandedTokens(), R, *SourceMgr);
}
CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
@@ -161,19 +213,109 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
// Our token could only be produced by the previous mapping.
if (It == File.Mappings.begin()) {
// No previous mapping, no need to modify offsets.
- return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr};
+ return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
+ /*Mapping=*/nullptr};
}
--It; // 'It' now points to last mapping that started before our token.
// Check if the token is part of the mapping.
if (ExpandedIndex < It->EndExpanded)
- return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It};
+ return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
// Not part of the mapping, use the index from previous mapping to compute the
// corresponding spelled token.
return {
&File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
- /*Mapping*/ nullptr};
+ /*Mapping=*/nullptr};
+}
+
+const TokenBuffer::Mapping *
+TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
+ const syntax::Token *Spelled) {
+ assert(F.SpelledTokens.data() <= Spelled);
+ unsigned SpelledI = Spelled - F.SpelledTokens.data();
+ assert(SpelledI < F.SpelledTokens.size());
+
+ auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
+ return M.BeginSpelled <= SpelledI;
+ });
+ if (It == F.Mappings.begin())
+ return nullptr;
+ --It;
+ return &*It;
+}
+
+llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
+ if (Spelled.empty())
+ return {};
+ assert(Spelled.front().location().isFileID());
+
+ auto FID = sourceManager().getFileID(Spelled.front().location());
+ auto It = Files.find(FID);
+ assert(It != Files.end());
+
+ const MarkedFile &File = It->second;
+ // `Spelled` must be a subrange of `File.SpelledTokens`.
+ assert(File.SpelledTokens.data() <= Spelled.data());
+ assert(&Spelled.back() <=
+ File.SpelledTokens.data() + File.SpelledTokens.size());
+#ifndef NDEBUG
+ auto T1 = Spelled.back().location();
+ auto T2 = File.SpelledTokens.back().location();
+ assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
+#endif
+
+ auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
+ unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
+ assert(SpelledFrontI < File.SpelledTokens.size());
+ unsigned ExpandedBegin;
+ if (!FrontMapping) {
+ // No mapping that starts before the first token of Spelled, we don't have
+ // to modify offsets.
+ ExpandedBegin = File.BeginExpanded + SpelledFrontI;
+ } else if (SpelledFrontI < FrontMapping->EndSpelled) {
+ // This mapping applies to Spelled tokens.
+ if (SpelledFrontI != FrontMapping->BeginSpelled) {
+ // Spelled tokens don't cover the entire mapping, returning empty result.
+ return {}; // FIXME: support macro arguments.
+ }
+ // Spelled tokens start at the beginning of this mapping.
+ ExpandedBegin = FrontMapping->BeginExpanded;
+ } else {
+ // Spelled tokens start after the mapping ends (they start in the hole
+ // between 2 mappings, or between a mapping and end of the file).
+ ExpandedBegin =
+ FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
+ }
+
+ auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
+ unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
+ unsigned ExpandedEnd;
+ if (!BackMapping) {
+ // No mapping that starts before the last token of Spelled, we don't have to
+ // modify offsets.
+ ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
+ } else if (SpelledBackI < BackMapping->EndSpelled) {
+ // This mapping applies to Spelled tokens.
+ if (SpelledBackI + 1 != BackMapping->EndSpelled) {
+ // Spelled tokens don't cover the entire mapping, returning empty result.
+ return {}; // FIXME: support macro arguments.
+ }
+ ExpandedEnd = BackMapping->EndExpanded;
+ } else {
+ // Spelled tokens end after the mapping ends.
+ ExpandedEnd =
+ BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
+ }
+
+ assert(ExpandedBegin < ExpandedTokens.size());
+ assert(ExpandedEnd < ExpandedTokens.size());
+ // Avoid returning empty ranges.
+ if (ExpandedBegin == ExpandedEnd)
+ return {};
+ return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
+ ExpandedTokens.data() + ExpandedEnd)};
}
llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
@@ -182,9 +324,20 @@ llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
return It->second.SpelledTokens;
}
+const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
+ assert(Loc.isFileID());
+ const auto *Tok = llvm::partition_point(
+ spelledTokens(SourceMgr->getFileID(Loc)),
+ [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
+ if (!Tok || Tok->location() != Loc)
+ return nullptr;
+ return Tok;
+}
+
std::string TokenBuffer::Mapping::str() const {
- return llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
- BeginSpelled, EndSpelled, BeginExpanded, EndExpanded);
+ return std::string(
+ llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
+ BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
}
llvm::Optional<llvm::ArrayRef<syntax::Token>>
@@ -194,8 +347,6 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
if (Expanded.empty())
return llvm::None;
- // FIXME: also allow changes uniquely mapping to macro arguments.
-
const syntax::Token *BeginSpelled;
const Mapping *BeginMapping;
std::tie(BeginSpelled, BeginMapping) =
@@ -213,12 +364,28 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
const MarkedFile &File = Files.find(FID)->second;
- // Do not allow changes that cross macro expansion boundaries.
+ // If both tokens are coming from a macro argument expansion, try and map to
+ // smallest part of the macro argument. BeginMapping && LastMapping check is
+ // only for performance, they are a prerequisite for Expanded.front() and
+ // Expanded.back() being part of a macro arg expansion.
+ if (BeginMapping && LastMapping &&
+ SourceMgr->isMacroArgExpansion(Expanded.front().location()) &&
+ SourceMgr->isMacroArgExpansion(Expanded.back().location())) {
+ auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
+ Expanded.back(), *SourceMgr);
+ // It might be the case that tokens are arguments of different macro calls,
+ // in that case we should continue with the logic below instead of returning
+ // an empty range.
+ if (CommonRange.isValid())
+ return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
+ }
+
+ // Do not allow changes that doesn't cover full expansion.
unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
- if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded)
+ if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded)
return llvm::None;
- if (LastMapping && EndExpanded < LastMapping->EndExpanded)
+ if (LastMapping && LastMapping->EndExpanded != EndExpanded)
return llvm::None;
// All is good, return the result.
return llvm::makeArrayRef(
@@ -253,24 +420,30 @@ TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
ExpandedTokens.data() + M->EndExpanded);
return E;
}
-
llvm::ArrayRef<syntax::Token>
syntax::spelledTokensTouching(SourceLocation Loc,
- const syntax::TokenBuffer &Tokens) {
+ llvm::ArrayRef<syntax::Token> Tokens) {
assert(Loc.isFileID());
- llvm::ArrayRef<syntax::Token> All =
- Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc));
+
auto *Right = llvm::partition_point(
- All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
- bool AcceptRight = Right != All.end() && Right->location() <= Loc;
- bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc;
+ Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
+ bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
+ bool AcceptLeft =
+ Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
Right + (AcceptRight ? 1 : 0));
}
+llvm::ArrayRef<syntax::Token>
+syntax::spelledTokensTouching(SourceLocation Loc,
+ const syntax::TokenBuffer &Tokens) {
+ return spelledTokensTouching(
+ Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
+}
+
const syntax::Token *
syntax::spelledIdentifierTouching(SourceLocation Loc,
- const syntax::TokenBuffer &Tokens) {
+ llvm::ArrayRef<syntax::Token> Tokens) {
for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
if (Tok.kind() == tok::identifier)
return &Tok;
@@ -278,6 +451,13 @@ syntax::spelledIdentifierTouching(SourceLocation Loc,
return nullptr;
}
+const syntax::Token *
+syntax::spelledIdentifierTouching(SourceLocation Loc,
+ const syntax::TokenBuffer &Tokens) {
+ return spelledIdentifierTouching(
+ Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
+}
+
std::vector<const syntax::Token *>
TokenBuffer::macroExpansions(FileID FID) const {
auto FileIt = Files.find(FID);
@@ -293,7 +473,8 @@ TokenBuffer::macroExpansions(FileID FID) const {
return Expansions;
}
-std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
+std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
+ const SourceManager &SM,
const LangOptions &LO) {
std::vector<syntax::Token> Tokens;
IdentifierTable Identifiers(LO);
@@ -308,18 +489,28 @@ std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
Tokens.push_back(syntax::Token(T));
};
- Lexer L(FID, SM.getBuffer(FID), SM, LO);
+ auto SrcBuffer = SM.getBufferData(FR.file());
+ Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
+ SrcBuffer.data() + FR.beginOffset(),
+ // We can't make BufEnd point to FR.endOffset, as Lexer requires a
+ // null terminated buffer.
+ SrcBuffer.data() + SrcBuffer.size());
clang::Token T;
- while (!L.LexFromRawLexer(T))
+ while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
AddToken(T);
- // 'eof' is only the last token if the input is null-terminated. Never store
- // it, for consistency.
- if (T.getKind() != tok::eof)
+ // LexFromRawLexer returns true when it parses the last token of the file, add
+ // it iff it starts within the range we are interested in.
+ if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
AddToken(T);
return Tokens;
}
+std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
+ const LangOptions &LO) {
+ return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
+}
+
/// Records information reqired to construct mappings for the token buffer that
/// we are collecting.
class TokenCollector::CollectPPExpansions : public PPCallbacks {
@@ -335,14 +526,38 @@ public:
SourceRange Range, const MacroArgs *Args) override {
if (!Collector)
return;
- // Only record top-level expansions, not those where:
+ const auto &SM = Collector->PP.getSourceManager();
+ // Only record top-level expansions that directly produce expanded tokens.
+ // This excludes those where:
// - the macro use is inside a macro body,
// - the macro appears in an argument to another macro.
- if (!MacroNameTok.getLocation().isFileID() ||
- (LastExpansionEnd.isValid() &&
- Collector->PP.getSourceManager().isBeforeInTranslationUnit(
- Range.getBegin(), LastExpansionEnd)))
+ // However macro expansion isn't really a tree, it's token rewrite rules,
+ // so there are other cases, e.g.
+ // #define B(X) X
+ // #define A 1 + B
+ // A(2)
+ // Both A and B produce expanded tokens, though the macro name 'B' comes
+ // from an expansion. The best we can do is merge the mappings for both.
+
+ // The *last* token of any top-level macro expansion must be in a file.
+ // (In the example above, see the closing paren of the expansion of B).
+ if (!Range.getEnd().isFileID())
+ return;
+ // If there's a current expansion that encloses this one, this one can't be
+ // top-level.
+ if (LastExpansionEnd.isValid() &&
+ !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
return;
+
+ // If the macro invocation (B) starts in a macro (A) but ends in a file,
+ // we'll create a merged mapping for A + B by overwriting the endpoint for
+ // A's startpoint.
+ if (!Range.getBegin().isFileID()) {
+ Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
+ assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) &&
+ "Overlapping macros should have same expansion location");
+ }
+
Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
LastExpansionEnd = Range.getEnd();
}
@@ -399,197 +614,180 @@ public:
}
TokenBuffer build() && {
- buildSpelledTokens();
-
- // Walk over expanded tokens and spelled tokens in parallel, building the
- // mappings between those using source locations.
- // To correctly recover empty macro expansions, we also take locations
- // reported to PPCallbacks::MacroExpands into account as we do not have any
- // expanded tokens with source locations to guide us.
-
- // The 'eof' token is special, it is not part of spelled token stream. We
- // handle it separately at the end.
assert(!Result.ExpandedTokens.empty());
assert(Result.ExpandedTokens.back().kind() == tok::eof);
- for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) {
- // (!) I might be updated by the following call.
- processExpandedToken(I);
- }
- // 'eof' not handled in the loop, do it here.
- assert(SM.getMainFileID() ==
- SM.getFileID(Result.ExpandedTokens.back().location()));
- fillGapUntil(Result.Files[SM.getMainFileID()],
- Result.ExpandedTokens.back().location(),
- Result.ExpandedTokens.size() - 1);
- Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size();
+ // Tokenize every file that contributed tokens to the expanded stream.
+ buildSpelledTokens();
- // Some files might have unaccounted spelled tokens at the end, add an empty
- // mapping for those as they did not have expanded counterparts.
- fillGapsAtEndOfFiles();
+ // The expanded token stream consists of runs of tokens that came from
+ // the same source (a macro expansion, part of a file etc).
+ // Between these runs are the logical positions of spelled tokens that
+ // didn't expand to anything.
+ while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
+ // Create empty mappings for spelled tokens that expanded to nothing here.
+ // May advance NextSpelled, but NextExpanded is unchanged.
+ discard();
+ // Create mapping for a contiguous run of expanded tokens.
+ // Advances NextExpanded past the run, and NextSpelled accordingly.
+ unsigned OldPosition = NextExpanded;
+ advance();
+ if (NextExpanded == OldPosition)
+ diagnoseAdvanceFailure();
+ }
+ // If any tokens remain in any of the files, they didn't expand to anything.
+ // Create empty mappings up until the end of the file.
+ for (const auto &File : Result.Files)
+ discard(File.first);
+
+#ifndef NDEBUG
+ for (auto &pair : Result.Files) {
+ auto &mappings = pair.second.Mappings;
+ assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
+ const TokenBuffer::Mapping &M2) {
+ return M1.BeginSpelled < M2.BeginSpelled &&
+ M1.EndSpelled < M2.EndSpelled &&
+ M1.BeginExpanded < M2.BeginExpanded &&
+ M1.EndExpanded < M2.EndExpanded;
+ }));
+ }
+#endif
return std::move(Result);
}
private:
- /// Process the next token in an expanded stream and move corresponding
- /// spelled tokens, record any mapping if needed.
- /// (!) \p I will be updated if this had to skip tokens, e.g. for macros.
- void processExpandedToken(unsigned &I) {
- auto L = Result.ExpandedTokens[I].location();
- if (L.isMacroID()) {
- processMacroExpansion(SM.getExpansionRange(L), I);
- return;
+ // Consume a sequence of spelled tokens that didn't expand to anything.
+ // In the simplest case, skips spelled tokens until finding one that produced
+ // the NextExpanded token, and creates an empty mapping for them.
+ // If Drain is provided, skips remaining tokens from that file instead.
+ void discard(llvm::Optional<FileID> Drain = llvm::None) {
+ SourceLocation Target =
+ Drain ? SM.getLocForEndOfFile(*Drain)
+ : SM.getExpansionLoc(
+ Result.ExpandedTokens[NextExpanded].location());
+ FileID File = SM.getFileID(Target);
+ const auto &SpelledTokens = Result.Files[File].SpelledTokens;
+ auto &NextSpelled = this->NextSpelled[File];
+
+ TokenBuffer::Mapping Mapping;
+ Mapping.BeginSpelled = NextSpelled;
+ // When dropping trailing tokens from a file, the empty mapping should
+ // be positioned within the file's expanded-token range (at the end).
+ Mapping.BeginExpanded = Mapping.EndExpanded =
+ Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
+ // We may want to split into several adjacent empty mappings.
+ // FlushMapping() emits the current mapping and starts a new one.
+ auto FlushMapping = [&, this] {
+ Mapping.EndSpelled = NextSpelled;
+ if (Mapping.BeginSpelled != Mapping.EndSpelled)
+ Result.Files[File].Mappings.push_back(Mapping);
+ Mapping.BeginSpelled = NextSpelled;
+ };
+
+ while (NextSpelled < SpelledTokens.size() &&
+ SpelledTokens[NextSpelled].location() < Target) {
+ // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
+ // then we want to partition our (empty) mapping.
+ // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
+ SourceLocation KnownEnd = CollectedExpansions.lookup(
+ SpelledTokens[NextSpelled].location().getRawEncoding());
+ if (KnownEnd.isValid()) {
+ FlushMapping(); // Emits [Start, NextSpelled)
+ while (NextSpelled < SpelledTokens.size() &&
+ SpelledTokens[NextSpelled].location() <= KnownEnd)
+ ++NextSpelled;
+ FlushMapping(); // Emits [NextSpelled, KnownEnd]
+ // Now the loop contitues and will emit (KnownEnd, Target).
+ } else {
+ ++NextSpelled;
+ }
}
- if (L.isFileID()) {
- auto FID = SM.getFileID(L);
- TokenBuffer::MarkedFile &File = Result.Files[FID];
-
- fillGapUntil(File, L, I);
+ FlushMapping();
+ }
- // Skip the token.
- assert(File.SpelledTokens[NextSpelled[FID]].location() == L &&
- "no corresponding token in the spelled stream");
- ++NextSpelled[FID];
- return;
+ // Consumes the NextExpanded token and others that are part of the same run.
+ // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
+ // (unless this is a run of file tokens, which we represent with no mapping).
+ void advance() {
+ const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
+ SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
+ FileID File = SM.getFileID(Expansion);
+ const auto &SpelledTokens = Result.Files[File].SpelledTokens;
+ auto &NextSpelled = this->NextSpelled[File];
+
+ if (Tok.location().isFileID()) {
+ // A run of file tokens continues while the expanded/spelled tokens match.
+ while (NextSpelled < SpelledTokens.size() &&
+ NextExpanded < Result.ExpandedTokens.size() &&
+ SpelledTokens[NextSpelled].location() ==
+ Result.ExpandedTokens[NextExpanded].location()) {
+ ++NextSpelled;
+ ++NextExpanded;
+ }
+ // We need no mapping for file tokens copied to the expanded stream.
+ } else {
+ // We found a new macro expansion. We should have its spelling bounds.
+ auto End = CollectedExpansions.lookup(Expansion.getRawEncoding());
+ assert(End.isValid() && "Macro expansion wasn't captured?");
+
+ // Mapping starts here...
+ TokenBuffer::Mapping Mapping;
+ Mapping.BeginExpanded = NextExpanded;
+ Mapping.BeginSpelled = NextSpelled;
+ // ... consumes spelled tokens within bounds we captured ...
+ while (NextSpelled < SpelledTokens.size() &&
+ SpelledTokens[NextSpelled].location() <= End)
+ ++NextSpelled;
+ // ... consumes expanded tokens rooted at the same expansion ...
+ while (NextExpanded < Result.ExpandedTokens.size() &&
+ SM.getExpansionLoc(
+ Result.ExpandedTokens[NextExpanded].location()) == Expansion)
+ ++NextExpanded;
+ // ... and ends here.
+ Mapping.EndExpanded = NextExpanded;
+ Mapping.EndSpelled = NextSpelled;
+ Result.Files[File].Mappings.push_back(Mapping);
}
}
- /// Skipped expanded and spelled tokens of a macro expansion that covers \p
- /// SpelledRange. Add a corresponding mapping.
- /// (!) \p I will be the index of the last token in an expansion after this
- /// function returns.
- void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) {
- auto FID = SM.getFileID(SpelledRange.getBegin());
- assert(FID == SM.getFileID(SpelledRange.getEnd()));
- TokenBuffer::MarkedFile &File = Result.Files[FID];
-
- fillGapUntil(File, SpelledRange.getBegin(), I);
-
- // Skip all expanded tokens from the same macro expansion.
- unsigned BeginExpanded = I;
- for (; I + 1 < Result.ExpandedTokens.size(); ++I) {
- auto NextL = Result.ExpandedTokens[I + 1].location();
- if (!NextL.isMacroID() ||
- SM.getExpansionLoc(NextL) != SpelledRange.getBegin())
- break;
+ // advance() is supposed to consume at least one token - if not, we crash.
+ void diagnoseAdvanceFailure() {
+#ifndef NDEBUG
+ // Show the failed-to-map token in context.
+ for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
+ I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
+ const char *L =
+ (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " ";
+ llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
}
- unsigned EndExpanded = I + 1;
- consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded,
- EndExpanded, NextSpelled[FID]);
+#endif
+ llvm_unreachable("Couldn't map expanded token to spelled tokens!");
}
/// Initializes TokenBuffer::Files and fills spelled tokens and expanded
/// ranges for each of the files.
void buildSpelledTokens() {
for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
- auto FID =
- SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location()));
+ const auto &Tok = Result.ExpandedTokens[I];
+ auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
auto It = Result.Files.try_emplace(FID);
TokenBuffer::MarkedFile &File = It.first->second;
- File.EndExpanded = I + 1;
+ // The eof token should not be considered part of the main-file's range.
+ File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
+
if (!It.second)
continue; // we have seen this file before.
-
// This is the first time we see this file.
File.BeginExpanded = I;
File.SpelledTokens = tokenize(FID, SM, LangOpts);
}
}
- void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
- unsigned ExpandedIndex, unsigned &SpelledIndex) {
- consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
- }
-
- /// Consumes spelled tokens that form a macro expansion and adds a entry to
- /// the resulting token buffer.
- /// (!) SpelledIndex is updated in-place.
- void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
- unsigned BeginExpanded, unsigned EndExpanded,
- unsigned &SpelledIndex) {
- // We need to record this mapping before continuing.
- unsigned MappingBegin = SpelledIndex;
- ++SpelledIndex;
-
- bool HitMapping =
- tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
- (void)HitMapping;
- assert(!HitMapping && "recursive macro expansion?");
-
- TokenBuffer::Mapping M;
- M.BeginExpanded = BeginExpanded;
- M.EndExpanded = EndExpanded;
- M.BeginSpelled = MappingBegin;
- M.EndSpelled = SpelledIndex;
-
- File.Mappings.push_back(M);
- }
-
- /// Consumes spelled tokens until location \p L is reached and adds a mapping
- /// covering the consumed tokens. The mapping will point to an empty expanded
- /// range at position \p ExpandedIndex.
- void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L,
- unsigned ExpandedIndex) {
- assert(L.isFileID());
- FileID FID;
- unsigned Offset;
- std::tie(FID, Offset) = SM.getDecomposedLoc(L);
-
- unsigned &SpelledIndex = NextSpelled[FID];
- unsigned MappingBegin = SpelledIndex;
- while (true) {
- auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
- if (SpelledIndex != MappingBegin) {
- TokenBuffer::Mapping M;
- M.BeginSpelled = MappingBegin;
- M.EndSpelled = SpelledIndex;
- M.BeginExpanded = M.EndExpanded = ExpandedIndex;
- File.Mappings.push_back(M);
- }
- if (!EndLoc)
- break;
- consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex,
- SpelledIndex);
-
- MappingBegin = SpelledIndex;
- }
- };
-
- /// Consumes spelled tokens until it reaches Offset or a mapping boundary,
- /// i.e. a name of a macro expansion or the start '#' token of a PP directive.
- /// (!) NextSpelled is updated in place.
- ///
- /// returns None if \p Offset was reached, otherwise returns the end location
- /// of a mapping that starts at \p NextSpelled.
- llvm::Optional<SourceLocation>
- tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset,
- unsigned &NextSpelled) {
- for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) {
- auto L = File.SpelledTokens[NextSpelled].location();
- if (Offset <= SM.getFileOffset(L))
- return llvm::None; // reached the offset we are looking for.
- auto Mapping = CollectedExpansions.find(L.getRawEncoding());
- if (Mapping != CollectedExpansions.end())
- return Mapping->second; // found a mapping before the offset.
- }
- return llvm::None; // no more tokens, we "reached" the offset.
- }
-
- /// Adds empty mappings for unconsumed spelled tokens at the end of each file.
- void fillGapsAtEndOfFiles() {
- for (auto &F : Result.Files) {
- if (F.second.SpelledTokens.empty())
- continue;
- fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
- F.second.EndExpanded);
- }
- }
-
TokenBuffer Result;
- /// For each file, a position of the next spelled token we will consume.
- llvm::DenseMap<FileID, unsigned> NextSpelled;
+ unsigned NextExpanded = 0; // cursor in ExpandedTokens
+ llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
PPExpansions CollectedExpansions;
const SourceManager &SM;
const LangOptions &LangOpts;
@@ -604,19 +802,20 @@ TokenBuffer TokenCollector::consume() && {
}
std::string syntax::Token::str() const {
- return llvm::formatv("Token({0}, length = {1})", tok::getTokenName(kind()),
- length());
+ return std::string(llvm::formatv("Token({0}, length = {1})",
+ tok::getTokenName(kind()), length()));
}
std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
- return llvm::formatv("{0} {1}", tok::getTokenName(kind()), text(SM));
+ return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
+ tok::getTokenName(kind()), length()));
}
std::string TokenBuffer::dumpForTests() const {
auto PrintToken = [this](const syntax::Token &T) -> std::string {
if (T.kind() == tok::eof)
return "<eof>";
- return T.text(*SourceMgr);
+ return std::string(T.text(*SourceMgr));
};
auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp
index 9a6270ec4cce3..37579e6145b65 100644
--- a/clang/lib/Tooling/Syntax/Tree.cpp
+++ b/clang/lib/Tooling/Syntax/Tree.cpp
@@ -58,22 +58,33 @@ bool syntax::Leaf::classof(const Node *N) {
syntax::Node::Node(NodeKind Kind)
: Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)),
- Role(static_cast<unsigned>(NodeRole::Detached)), Original(false),
- CanModify(false) {}
+ Role(0), Original(false), CanModify(false) {
+ this->setRole(NodeRole::Detached);
+}
bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; }
+void syntax::Node::setRole(NodeRole NR) {
+ this->Role = static_cast<unsigned>(NR);
+}
+
bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; }
void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
- assert(Child->Parent == nullptr);
- assert(Child->NextSibling == nullptr);
assert(Child->role() == NodeRole::Detached);
assert(Role != NodeRole::Detached);
+ Child->setRole(Role);
+ prependChildLowLevel(Child);
+}
+
+void syntax::Tree::prependChildLowLevel(Node *Child) {
+ assert(Child->Parent == nullptr);
+ assert(Child->NextSibling == nullptr);
+ assert(Child->role() != NodeRole::Detached);
+
Child->Parent = this;
Child->NextSibling = this->FirstChild;
- Child->Role = static_cast<unsigned>(Role);
this->FirstChild = Child;
}
@@ -94,7 +105,7 @@ void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End,
N != End;) {
auto *Next = N->NextSibling;
- N->Role = static_cast<unsigned>(NodeRole::Detached);
+ N->setRole(NodeRole::Detached);
N->Parent = nullptr;
N->NextSibling = nullptr;
if (N->Original)