diff options
Diffstat (limited to 'lib/tre-ast.h')
-rw-r--r-- | lib/tre-ast.h | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/lib/tre-ast.h b/lib/tre-ast.h new file mode 100644 index 0000000000000..d9af3762f9acc --- /dev/null +++ b/lib/tre-ast.h @@ -0,0 +1,128 @@ +/* + tre-ast.h - Abstract syntax tree (AST) definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + + +#ifndef TRE_AST_H +#define TRE_AST_H 1 + +#include "tre-mem.h" +#include "tre-internal.h" +#include "tre-compile.h" + +/* The different AST node types. */ +typedef enum { + LITERAL, + CATENATION, + ITERATION, + UNION +} tre_ast_type_t; + +/* Special subtypes of TRE_LITERAL. */ +#define EMPTY -1 /* Empty leaf (denotes empty string). */ +#define ASSERTION -2 /* Assertion leaf. */ +#define TAG -3 /* Tag leaf. */ +#define BACKREF -4 /* Back reference leaf. */ +#define PARAMETER -5 /* Parameter. */ + +#define IS_SPECIAL(x) ((x)->code_min < 0) +#define IS_EMPTY(x) ((x)->code_min == EMPTY) +#define IS_ASSERTION(x) ((x)->code_min == ASSERTION) +#define IS_TAG(x) ((x)->code_min == TAG) +#define IS_BACKREF(x) ((x)->code_min == BACKREF) +#define IS_PARAMETER(x) ((x)->code_min == PARAMETER) + + +/* A generic AST node. All AST nodes consist of this node on the top + level with `obj' pointing to the actual content. */ +typedef struct { + tre_ast_type_t type; /* Type of the node. */ + void *obj; /* Pointer to actual node. */ + int nullable; + int submatch_id; + int num_submatches; + int num_tags; + tre_pos_and_tags_t *firstpos; + tre_pos_and_tags_t *lastpos; +} tre_ast_node_t; + + +/* A "literal" node. These are created for assertions, back references, + tags, matching parameter settings, and all expressions that match one + character. */ +typedef struct { + long code_min; + long code_max; + int position; + union { + tre_ctype_t class; + int *params; + } u; + tre_ctype_t *neg_classes; +} tre_literal_t; + +/* A "catenation" node. These are created when two regexps are concatenated. + If there are more than one subexpressions in sequence, the `left' part + holds all but the last, and `right' part holds the last subexpression + (catenation is left associative). */ +typedef struct { + tre_ast_node_t *left; + tre_ast_node_t *right; +} tre_catenation_t; + +/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" + operators. */ +typedef struct { + /* Subexpression to match. */ + tre_ast_node_t *arg; + /* Minimum number of consecutive matches. */ + int min; + /* Maximum number of consecutive matches. */ + int max; + /* If 0, match as many characters as possible, if 1 match as few as + possible. Note that this does not always mean the same thing as + matching as many/few repetitions as possible. */ + unsigned int minimal:1; + /* Approximate matching parameters (or NULL). */ + int *params; +} tre_iteration_t; + +/* An "union" node. These are created for the "|" operator. */ +typedef struct { + tre_ast_node_t *left; + tre_ast_node_t *right; +} tre_union_t; + +tre_ast_node_t * +tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size); + +tre_ast_node_t * +tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position); + +tre_ast_node_t * +tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, + int minimal); + +tre_ast_node_t * +tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); + +tre_ast_node_t * +tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, + tre_ast_node_t *right); + +#ifdef TRE_DEBUG +void +tre_ast_print(tre_ast_node_t *tree); + +/* XXX - rethink AST printing API */ +void +tre_print_params(int *params); +#endif /* TRE_DEBUG */ + +#endif /* TRE_AST_H */ + +/* EOF */ |