summaryrefslogtreecommitdiff
path: root/include/clang/Tooling/Syntax
diff options
context:
space:
mode:
Diffstat (limited to 'include/clang/Tooling/Syntax')
-rw-r--r--include/clang/Tooling/Syntax/BuildTree.h24
-rw-r--r--include/clang/Tooling/Syntax/Nodes.h92
-rw-r--r--include/clang/Tooling/Syntax/Tokens.h355
-rw-r--r--include/clang/Tooling/Syntax/Tree.h142
4 files changed, 613 insertions, 0 deletions
diff --git a/include/clang/Tooling/Syntax/BuildTree.h b/include/clang/Tooling/Syntax/BuildTree.h
new file mode 100644
index 000000000000..055d6462eabd
--- /dev/null
+++ b/include/clang/Tooling/Syntax/BuildTree.h
@@ -0,0 +1,24 @@
+//===- BuildTree.h - build syntax trees -----------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Functions to construct a syntax tree from an AST.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
+
+#include "clang/AST/Decl.h"
+#include "clang/Tooling/Syntax/Nodes.h"
+
+namespace clang {
+namespace syntax {
+
+/// Build a syntax tree for the main file.
+syntax::TranslationUnit *buildSyntaxTree(Arena &A,
+ const clang::TranslationUnitDecl &TU);
+} // namespace syntax
+} // namespace clang
+#endif
diff --git a/include/clang/Tooling/Syntax/Nodes.h b/include/clang/Tooling/Syntax/Nodes.h
new file mode 100644
index 000000000000..d20c7cb7b171
--- /dev/null
+++ b/include/clang/Tooling/Syntax/Nodes.h
@@ -0,0 +1,92 @@
+//===- Nodes.h - syntax nodes for C/C++ grammar constructs ----*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Syntax tree nodes for C, C++ and Objective-C grammar constructs.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H
+#define LLVM_CLANG_TOOLING_SYNTAX_NODES_H
+
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/Token.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/Tree.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace syntax {
+
+/// A kind of a syntax node, used for implementing casts.
+enum class NodeKind : uint16_t {
+ Leaf,
+ TranslationUnit,
+ TopLevelDeclaration,
+ CompoundStatement
+};
+/// For debugging purposes.
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K);
+
+/// A relation between a parent and child node. Used for implementing accessors.
+enum class NodeRole : uint8_t {
+ // A node without a parent.
+ Detached,
+ // Children of an unknown semantic nature, e.g. skipped tokens, comments.
+ Unknown,
+ // FIXME: should this be shared for all other nodes with braces, e.g. init
+ // lists?
+ CompoundStatement_lbrace,
+ CompoundStatement_rbrace
+};
+
+/// A root node for a translation unit. Parent is always null.
+class TranslationUnit final : public Tree {
+public:
+ TranslationUnit() : Tree(NodeKind::TranslationUnit) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::TranslationUnit;
+ }
+};
+
+/// FIXME: this node is temporary and will be replaced with nodes for various
+/// 'declarations' and 'declarators' from the C/C++ grammar
+///
+/// Represents any top-level declaration. Only there to give the syntax tree a
+/// bit of structure until we implement syntax nodes for declarations and
+/// declarators.
+class TopLevelDeclaration final : public Tree {
+public:
+ TopLevelDeclaration() : Tree(NodeKind::TopLevelDeclaration) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::TopLevelDeclaration;
+ }
+};
+
+/// An abstract node for C++ statements, e.g. 'while', 'if', etc.
+class Statement : public Tree {
+public:
+ Statement(NodeKind K) : Tree(K) {}
+ static bool classof(const Node *N) {
+ return NodeKind::CompoundStatement <= N->kind() &&
+ N->kind() <= NodeKind::CompoundStatement;
+ }
+};
+
+/// { statement1; statement2; … }
+class CompoundStatement final : public Statement {
+public:
+ CompoundStatement() : Statement(NodeKind::CompoundStatement) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::CompoundStatement;
+ }
+ syntax::Leaf *lbrace();
+ syntax::Leaf *rbrace();
+};
+
+} // namespace syntax
+} // namespace clang
+#endif
diff --git a/include/clang/Tooling/Syntax/Tokens.h b/include/clang/Tooling/Syntax/Tokens.h
new file mode 100644
index 000000000000..4640ccb2d30a
--- /dev/null
+++ b/include/clang/Tooling/Syntax/Tokens.h
@@ -0,0 +1,355 @@
+//===- Tokens.h - collect tokens from preprocessing --------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Record tokens that a preprocessor emits and define operations to map between
+// the tokens written in a file and tokens produced by the preprocessor.
+//
+// When running the compiler, there are two token streams we are interested in:
+// - "spelled" tokens directly correspond to a substring written in some
+// source file.
+// - "expanded" tokens represent the result of preprocessing, parses consumes
+// this token stream to produce the AST.
+//
+// Expanded tokens correspond directly to locations found in the AST, allowing
+// to find subranges of the token stream covered by various AST nodes. Spelled
+// tokens correspond directly to the source code written by the user.
+//
+// To allow composing these two use-cases, we also define operations that map
+// between expanded and spelled tokens that produced them (macro calls,
+// directives, etc).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKENS_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TOKENS_H
+
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+#include <tuple>
+
+namespace clang {
+class Preprocessor;
+
+namespace syntax {
+
+/// A half-open character range inside a particular file, the start offset is
+/// included and the end offset is excluded from the range.
+struct FileRange {
+ /// EXPECTS: File.isValid() && Begin <= End.
+ FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset);
+ /// EXPECTS: BeginLoc.isValid() && BeginLoc.isFileID().
+ FileRange(const SourceManager &SM, SourceLocation BeginLoc, unsigned Length);
+ /// EXPECTS: BeginLoc.isValid() && BeginLoc.isFileID(), Begin <= End and files
+ /// are the same.
+ FileRange(const SourceManager &SM, SourceLocation BeginLoc,
+ SourceLocation EndLoc);
+
+ FileID file() const { return File; }
+ /// Start is a start offset (inclusive) in the corresponding file.
+ unsigned beginOffset() const { return Begin; }
+ /// End offset (exclusive) in the corresponding file.
+ unsigned endOffset() const { return End; }
+
+ unsigned length() const { return End - Begin; }
+
+ /// Check if \p Offset is inside the range.
+ bool contains(unsigned Offset) const {
+ return Begin <= Offset && Offset < End;
+ }
+ /// Check \p Offset is inside the range or equal to its endpoint.
+ bool touches(unsigned Offset) const {
+ return Begin <= Offset && Offset <= End;
+ }
+
+ /// Gets the substring that this FileRange refers to.
+ llvm::StringRef text(const SourceManager &SM) const;
+
+ friend bool operator==(const FileRange &L, const FileRange &R) {
+ return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End);
+ }
+ friend bool operator!=(const FileRange &L, const FileRange &R) {
+ return !(L == R);
+ }
+
+private:
+ FileID File;
+ unsigned Begin;
+ unsigned End;
+};
+
+/// For debugging purposes.
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FileRange &R);
+
+/// A token coming directly from a file or from a macro invocation. Has just
+/// enough information to locate the token in the source code.
+/// Can represent both expanded and spelled tokens.
+class Token {
+public:
+ Token(SourceLocation Location, unsigned Length, tok::TokenKind Kind);
+ /// EXPECTS: clang::Token is not an annotation token.
+ explicit Token(const clang::Token &T);
+
+ tok::TokenKind kind() const { return Kind; }
+ /// Location of the first character of a token.
+ SourceLocation location() const { return Location; }
+ /// Location right after the last character of a token.
+ SourceLocation endLocation() const {
+ return Location.getLocWithOffset(Length);
+ }
+ unsigned length() const { return Length; }
+
+ /// Get the substring covered by the token. Note that will include all
+ /// digraphs, newline continuations, etc. E.g. tokens for 'int' and
+ /// in\
+ /// t
+ /// both have the same kind tok::kw_int, but results of text() are different.
+ llvm::StringRef text(const SourceManager &SM) const;
+
+ /// Gets a range of this token.
+ /// EXPECTS: token comes from a file, not from a macro expansion.
+ FileRange range(const SourceManager &SM) const;
+
+ /// Given two tokens inside the same file, returns a file range that starts at
+ /// \p First and ends at \p Last.
+ /// EXPECTS: First and Last are file tokens from the same file, Last starts
+ /// after First.
+ static FileRange range(const SourceManager &SM, const syntax::Token &First,
+ const syntax::Token &Last);
+
+ std::string dumpForTests(const SourceManager &SM) const;
+ /// For debugging purposes.
+ std::string str() const;
+
+private:
+ SourceLocation Location;
+ unsigned Length;
+ tok::TokenKind Kind;
+};
+/// For debugging purposes. Equivalent to a call to Token::str().
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T);
+
+/// A list of tokens obtained by preprocessing a text buffer and operations to
+/// map between the expanded and spelled tokens, i.e. TokenBuffer has
+/// information about two token streams:
+/// 1. Expanded tokens: tokens produced by the preprocessor after all macro
+/// replacements,
+/// 2. Spelled tokens: corresponding directly to the source code of a file
+/// before any macro replacements occurred.
+/// Here's an example to illustrate a difference between those two:
+/// #define FOO 10
+/// int a = FOO;
+///
+/// Spelled tokens are {'#','define','FOO','10','int','a','=','FOO',';'}.
+/// Expanded tokens are {'int','a','=','10',';','eof'}.
+///
+/// Note that the expanded token stream has a tok::eof token at the end, the
+/// spelled tokens never store a 'eof' token.
+///
+/// The full list expanded tokens can be obtained with expandedTokens(). Spelled
+/// tokens for each of the files can be obtained via spelledTokens(FileID).
+///
+/// To map between the expanded and spelled tokens use findSpelledByExpanded().
+///
+/// To build a token buffer use the TokenCollector class. You can also compute
+/// the spelled tokens of a file using the tokenize() helper.
+///
+/// FIXME: allow to map from spelled to expanded tokens when use-case shows up.
+/// FIXME: allow mappings into macro arguments.
+class TokenBuffer {
+public:
+ TokenBuffer(const SourceManager &SourceMgr) : SourceMgr(&SourceMgr) {}
+ /// All tokens produced by the preprocessor after all macro replacements,
+ /// directives, etc. Source locations found in the clang AST will always
+ /// point to one of these tokens.
+ /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split
+ /// into two '>' tokens by the parser. However, TokenBuffer currently
+ /// keeps it as a single '>>' token.
+ llvm::ArrayRef<syntax::Token> expandedTokens() const {
+ return ExpandedTokens;
+ }
+
+ /// Find the subrange of spelled tokens that produced the corresponding \p
+ /// Expanded tokens.
+ ///
+ /// EXPECTS: \p Expanded is a subrange of expandedTokens().
+ ///
+ /// Will fail if the expanded tokens do not correspond to a
+ /// sequence of spelled tokens. E.g. for the following example:
+ ///
+ /// #define FIRST f1 f2 f3
+ /// #define SECOND s1 s2 s3
+ ///
+ /// a FIRST b SECOND c // expanded tokens are: a f1 f2 f3 b s1 s2 s3 c
+ ///
+ /// the results would be:
+ /// expanded => spelled
+ /// ------------------------
+ /// a => a
+ /// s1 s2 s3 => SECOND
+ /// a f1 f2 f3 => a FIRST
+ /// a f1 => can't map
+ /// s1 s2 => can't map
+ ///
+ /// If \p Expanded is empty, the returned value is llvm::None.
+ /// Complexity is logarithmic.
+ llvm::Optional<llvm::ArrayRef<syntax::Token>>
+ spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const;
+
+ /// An expansion produced by the preprocessor, includes macro expansions and
+ /// preprocessor directives. Preprocessor always maps a non-empty range of
+ /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a
+ /// few examples of expansions:
+ /// #pragma once // Expands to an empty range.
+ /// #define FOO 1 2 3 // Expands an empty range.
+ /// FOO // Expands to "1 2 3".
+ /// FIXME(ibiryukov): implement this, currently #include expansions are empty.
+ /// #include <vector> // Expands to tokens produced by the include.
+ struct Expansion {
+ llvm::ArrayRef<syntax::Token> Spelled;
+ llvm::ArrayRef<syntax::Token> Expanded;
+ };
+ /// If \p Spelled starts a mapping (e.g. if it's a macro name or '#' starting
+ /// a preprocessor directive) return the subrange of expanded tokens that the
+ /// macro expands to.
+ llvm::Optional<Expansion>
+ expansionStartingAt(const syntax::Token *Spelled) const;
+
+ /// Lexed tokens of a file before preprocessing. E.g. for the following input
+ /// #define DECL(name) int name = 10
+ /// DECL(a);
+ /// spelledTokens() returns {"#", "define", "DECL", "(", "name", ")", "eof"}.
+ /// FIXME: we do not yet store tokens of directives, like #include, #define,
+ /// #pragma, etc.
+ llvm::ArrayRef<syntax::Token> spelledTokens(FileID FID) const;
+
+ const SourceManager &sourceManager() const { return *SourceMgr; }
+
+ std::string dumpForTests() const;
+
+private:
+ /// Describes a mapping between a continuous subrange of spelled tokens and
+ /// expanded tokens. Represents macro expansions, preprocessor directives,
+ /// conditionally disabled pp regions, etc.
+ /// #define FOO 1+2
+ /// #define BAR(a) a + 1
+ /// FOO // invocation #1, tokens = {'1','+','2'}, macroTokens = {'FOO'}.
+ /// BAR(1) // invocation #2, tokens = {'a', '+', '1'},
+ /// macroTokens = {'BAR', '(', '1', ')'}.
+ struct Mapping {
+ // Positions in the corresponding spelled token stream. The corresponding
+ // range is never empty.
+ unsigned BeginSpelled = 0;
+ unsigned EndSpelled = 0;
+ // Positions in the expanded token stream. The corresponding range can be
+ // empty.
+ unsigned BeginExpanded = 0;
+ unsigned EndExpanded = 0;
+
+ /// For debugging purposes.
+ std::string str() const;
+ };
+ /// Spelled tokens of the file with information about the subranges.
+ struct MarkedFile {
+ /// Lexed, but not preprocessed, tokens of the file. These map directly to
+ /// text in the corresponding files and include tokens of all preprocessor
+ /// directives.
+ /// FIXME: spelled tokens don't change across FileID that map to the same
+ /// FileEntry. We could consider deduplicating them to save memory.
+ std::vector<syntax::Token> SpelledTokens;
+ /// A sorted list to convert between the spelled and expanded token streams.
+ std::vector<Mapping> Mappings;
+ /// The first expanded token produced for this FileID.
+ unsigned BeginExpanded = 0;
+ unsigned EndExpanded = 0;
+ };
+
+ friend class TokenCollector;
+
+ /// Maps a single expanded token to its spelled counterpart or a mapping that
+ /// produced it.
+ std::pair<const syntax::Token *, const Mapping *>
+ spelledForExpandedToken(const syntax::Token *Expanded) const;
+
+ /// Token stream produced after preprocessing, conceputally this captures the
+ /// same stream as 'clang -E' (excluding the preprocessor directives like
+ /// #file, etc.).
+ std::vector<syntax::Token> ExpandedTokens;
+ llvm::DenseMap<FileID, MarkedFile> Files;
+ // The value is never null, pointer instead of reference to avoid disabling
+ // implicit assignment operator.
+ const SourceManager *SourceMgr;
+};
+
+/// Lex the text buffer, corresponding to \p FID, in raw mode and record the
+/// resulting spelled tokens. Does minimal post-processing on raw identifiers,
+/// setting the appropriate token kind (instead of the raw_identifier reported
+/// by lexer in raw mode). This is a very low-level function, most users should
+/// prefer to use TokenCollector. Lexing in raw mode produces wildly different
+/// results from what one might expect when running a C++ frontend, e.g.
+/// preprocessor does not run at all.
+/// The result will *not* have a 'eof' token at the end.
+std::vector<syntax::Token> tokenize(FileID FID, const SourceManager &SM,
+ const LangOptions &LO);
+
+/// Collects tokens for the main file while running the frontend action. An
+/// instance of this object should be created on
+/// FrontendAction::BeginSourceFile() and the results should be consumed after
+/// FrontendAction::Execute() finishes.
+class TokenCollector {
+public:
+ /// Adds the hooks to collect the tokens. Should be called before the
+ /// preprocessing starts, i.e. as a part of BeginSourceFile() or
+ /// CreateASTConsumer().
+ TokenCollector(Preprocessor &P);
+
+ /// Finalizes token collection. Should be called after preprocessing is
+ /// finished, i.e. after running Execute().
+ LLVM_NODISCARD TokenBuffer consume() &&;
+
+private:
+ /// Maps from a start to an end spelling location of transformations
+ /// performed by the preprocessor. These include:
+ /// 1. range from '#' to the last token in the line for PP directives,
+ /// 2. macro name and arguments for macro expansions.
+ /// Note that we record only top-level macro expansions, intermediate
+ /// expansions (e.g. inside macro arguments) are ignored.
+ ///
+ /// Used to find correct boundaries of macro calls and directives when
+ /// building mappings from spelled to expanded tokens.
+ ///
+ /// Logically, at each point of the preprocessor execution there is a stack of
+ /// macro expansions being processed and we could use it to recover the
+ /// location information we need. However, the public preprocessor API only
+ /// exposes the points when macro expansions start (when we push a macro onto
+ /// the stack) and not when they end (when we pop a macro from the stack).
+ /// To workaround this limitation, we rely on source location information
+ /// stored in this map.
+ using PPExpansions = llvm::DenseMap</*SourceLocation*/ int, SourceLocation>;
+ class Builder;
+ class CollectPPExpansions;
+
+ std::vector<syntax::Token> Expanded;
+ // FIXME: we only store macro expansions, also add directives(#pragma, etc.)
+ PPExpansions Expansions;
+ Preprocessor &PP;
+ CollectPPExpansions *Collector;
+};
+
+} // namespace syntax
+} // namespace clang
+
+#endif
diff --git a/include/clang/Tooling/Syntax/Tree.h b/include/clang/Tooling/Syntax/Tree.h
new file mode 100644
index 000000000000..b66777561a6a
--- /dev/null
+++ b/include/clang/Tooling/Syntax/Tree.h
@@ -0,0 +1,142 @@
+//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Defines the basic structure of the syntax tree. There are two kinds of nodes:
+// - leaf nodes correspond to a token in the expanded token stream,
+// - tree nodes correspond to language grammar constructs.
+//
+// The tree is initially built from an AST. Each node of a newly built tree
+// covers a continous subrange of expanded tokens (i.e. tokens after
+// preprocessing), the specific tokens coverered are stored in the leaf nodes of
+// a tree. A post-order traversal of a tree will visit leaf nodes in an order
+// corresponding the original order of expanded tokens.
+//
+// This is still work in progress and highly experimental, we leave room for
+// ourselves to completely change the design and/or implementation.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
+
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Allocator.h"
+#include <cstdint>
+
+namespace clang {
+namespace syntax {
+
+/// A memory arena for syntax trees. Also tracks the underlying token buffers,
+/// source manager, etc.
+class Arena {
+public:
+ Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
+ TokenBuffer Tokens);
+
+ const SourceManager &sourceManager() const { return SourceMgr; }
+ const LangOptions &langOptions() const { return LangOpts; }
+
+ const TokenBuffer &tokenBuffer() const;
+ llvm::BumpPtrAllocator &allocator() { return Allocator; }
+
+ /// Add \p Buffer to the underlying source manager, tokenize it and store the
+ /// resulting tokens. Useful when there is a need to materialize tokens that
+ /// were not written in user code.
+ std::pair<FileID, llvm::ArrayRef<syntax::Token>>
+ lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
+
+private:
+ SourceManager &SourceMgr;
+ const LangOptions &LangOpts;
+ TokenBuffer Tokens;
+ /// IDs and storage for additional tokenized files.
+ llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
+ /// Keeps all the allocated nodes and their intermediate data structures.
+ llvm::BumpPtrAllocator Allocator;
+};
+
+class Tree;
+class TreeBuilder;
+enum class NodeKind : uint16_t;
+enum class NodeRole : uint8_t;
+
+/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
+/// a Tree (representing language constructrs).
+class Node {
+public:
+ /// Newly created nodes are detached from a tree, parent and sibling links are
+ /// set when the node is added as a child to another one.
+ Node(NodeKind Kind);
+
+ NodeKind kind() const { return static_cast<NodeKind>(Kind); }
+ NodeRole role() const { return static_cast<NodeRole>(Role); }
+
+ const Tree *parent() const { return Parent; }
+ Tree *parent() { return Parent; }
+
+ const Node *nextSibling() const { return NextSibling; }
+ Node *nextSibling() { return NextSibling; }
+
+ /// Dumps the structure of a subtree. For debugging and testing purposes.
+ std::string dump(const Arena &A) const;
+ /// Dumps the tokens forming this subtree.
+ std::string dumpTokens(const Arena &A) const;
+
+private:
+ // Tree is allowed to change the Parent link and Role.
+ friend class Tree;
+
+ Tree *Parent;
+ Node *NextSibling;
+ unsigned Kind : 16;
+ unsigned Role : 8;
+};
+
+/// A leaf node points to a single token inside the expanded token stream.
+class Leaf final : public Node {
+public:
+ Leaf(const syntax::Token *T);
+ static bool classof(const Node *N);
+
+ const syntax::Token *token() const { return Tok; }
+
+private:
+ const syntax::Token *Tok;
+};
+
+/// A node that has children and represents a syntactic language construct.
+class Tree : public Node {
+public:
+ using Node::Node;
+ static bool classof(const Node *N);
+
+ Node *firstChild() { return FirstChild; }
+ const Node *firstChild() const { return FirstChild; }
+
+protected:
+ /// Find the first node with a corresponding role.
+ syntax::Node *findChild(NodeRole R);
+
+private:
+ /// Prepend \p Child to the list of children and and sets the parent pointer.
+ /// A very low-level operation that does not check any invariants, only used
+ /// by TreeBuilder.
+ /// EXPECTS: Role != NodeRoleDetached.
+ void prependChildLowLevel(Node *Child, NodeRole Role);
+ friend class TreeBuilder;
+
+ Node *FirstChild = nullptr;
+};
+
+} // namespace syntax
+} // namespace clang
+
+#endif