summaryrefslogtreecommitdiff
path: root/lib/tre-ast.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/tre-ast.h')
-rw-r--r--lib/tre-ast.h128
1 files changed, 128 insertions, 0 deletions
diff --git a/lib/tre-ast.h b/lib/tre-ast.h
new file mode 100644
index 0000000000000..d9af3762f9acc
--- /dev/null
+++ b/lib/tre-ast.h
@@ -0,0 +1,128 @@
+/*
+ tre-ast.h - Abstract syntax tree (AST) definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+
+#ifndef TRE_AST_H
+#define TRE_AST_H 1
+
+#include "tre-mem.h"
+#include "tre-internal.h"
+#include "tre-compile.h"
+
+/* The different AST node types. */
+typedef enum {
+ LITERAL,
+ CATENATION,
+ ITERATION,
+ UNION
+} tre_ast_type_t;
+
+/* Special subtypes of TRE_LITERAL. */
+#define EMPTY -1 /* Empty leaf (denotes empty string). */
+#define ASSERTION -2 /* Assertion leaf. */
+#define TAG -3 /* Tag leaf. */
+#define BACKREF -4 /* Back reference leaf. */
+#define PARAMETER -5 /* Parameter. */
+
+#define IS_SPECIAL(x) ((x)->code_min < 0)
+#define IS_EMPTY(x) ((x)->code_min == EMPTY)
+#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
+#define IS_TAG(x) ((x)->code_min == TAG)
+#define IS_BACKREF(x) ((x)->code_min == BACKREF)
+#define IS_PARAMETER(x) ((x)->code_min == PARAMETER)
+
+
+/* A generic AST node. All AST nodes consist of this node on the top
+ level with `obj' pointing to the actual content. */
+typedef struct {
+ tre_ast_type_t type; /* Type of the node. */
+ void *obj; /* Pointer to actual node. */
+ int nullable;
+ int submatch_id;
+ int num_submatches;
+ int num_tags;
+ tre_pos_and_tags_t *firstpos;
+ tre_pos_and_tags_t *lastpos;
+} tre_ast_node_t;
+
+
+/* A "literal" node. These are created for assertions, back references,
+ tags, matching parameter settings, and all expressions that match one
+ character. */
+typedef struct {
+ long code_min;
+ long code_max;
+ int position;
+ union {
+ tre_ctype_t class;
+ int *params;
+ } u;
+ tre_ctype_t *neg_classes;
+} tre_literal_t;
+
+/* A "catenation" node. These are created when two regexps are concatenated.
+ If there are more than one subexpressions in sequence, the `left' part
+ holds all but the last, and `right' part holds the last subexpression
+ (catenation is left associative). */
+typedef struct {
+ tre_ast_node_t *left;
+ tre_ast_node_t *right;
+} tre_catenation_t;
+
+/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
+ operators. */
+typedef struct {
+ /* Subexpression to match. */
+ tre_ast_node_t *arg;
+ /* Minimum number of consecutive matches. */
+ int min;
+ /* Maximum number of consecutive matches. */
+ int max;
+ /* If 0, match as many characters as possible, if 1 match as few as
+ possible. Note that this does not always mean the same thing as
+ matching as many/few repetitions as possible. */
+ unsigned int minimal:1;
+ /* Approximate matching parameters (or NULL). */
+ int *params;
+} tre_iteration_t;
+
+/* An "union" node. These are created for the "|" operator. */
+typedef struct {
+ tre_ast_node_t *left;
+ tre_ast_node_t *right;
+} tre_union_t;
+
+tre_ast_node_t *
+tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
+
+tre_ast_node_t *
+tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
+
+tre_ast_node_t *
+tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
+ int minimal);
+
+tre_ast_node_t *
+tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
+
+tre_ast_node_t *
+tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
+ tre_ast_node_t *right);
+
+#ifdef TRE_DEBUG
+void
+tre_ast_print(tre_ast_node_t *tree);
+
+/* XXX - rethink AST printing API */
+void
+tre_print_params(int *params);
+#endif /* TRE_DEBUG */
+
+#endif /* TRE_AST_H */
+
+/* EOF */