summaryrefslogtreecommitdiff
path: root/lib/tre-match-approx.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/tre-match-approx.c')
-rw-r--r--lib/tre-match-approx.c812
1 files changed, 812 insertions, 0 deletions
diff --git a/lib/tre-match-approx.c b/lib/tre-match-approx.c
new file mode 100644
index 0000000000000..74ea330689053
--- /dev/null
+++ b/lib/tre-match-approx.c
@@ -0,0 +1,812 @@
+/*
+ tre-match-approx.c - TRE approximate regex matching engine
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+/* AIX requires this to be the first thing in the file. */
+#ifdef TRE_USE_ALLOCA
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ #pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+# endif
+# endif
+# endif
+#endif
+#endif /* TRE_USE_ALLOCA */
+
+#define __USE_STRING_INLINES
+#undef __NO_INLINE__
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+#ifndef TRE_WCHAR
+#include <ctype.h>
+#endif /* !TRE_WCHAR */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+
+#include "tre-internal.h"
+#include "tre-match-utils.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+#define TRE_M_COST 0
+#define TRE_M_NUM_INS 1
+#define TRE_M_NUM_DEL 2
+#define TRE_M_NUM_SUBST 3
+#define TRE_M_NUM_ERR 4
+#define TRE_M_LAST 5
+
+#define TRE_M_MAX_DEPTH 3
+
+typedef struct {
+ /* State in the TNFA transition table. */
+ tre_tnfa_transition_t *state;
+ /* Position in input string. */
+ int pos;
+ /* Tag values. */
+ int *tags;
+ /* Matching parameters. */
+ regaparams_t params;
+ /* Nesting depth of parameters. This is used as an index in
+ the `costs' array. */
+ int depth;
+ /* Costs and counter values for different parameter nesting depths. */
+ int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
+} tre_tnfa_approx_reach_t;
+
+
+#ifdef TRE_DEBUG
+/* Prints the `reach' array in a readable fashion with DPRINT. */
+static void
+tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
+ int pos, int num_tags)
+{
+ int id;
+
+ /* Print each state on one line. */
+ DPRINT((" reach:\n"));
+ for (id = 0; id < tnfa->num_states; id++)
+ {
+ int i, j;
+ if (reach[id].pos < pos)
+ continue; /* Not reached. */
+ DPRINT((" %03d, costs ", id));
+ for (i = 0; i <= reach[id].depth; i++)
+ {
+ DPRINT(("["));
+ for (j = 0; j < TRE_M_LAST; j++)
+ {
+ DPRINT(("%2d", reach[id].costs[i][j]));
+ if (j + 1 < TRE_M_LAST)
+ DPRINT((","));
+ }
+ DPRINT(("]"));
+ if (i + 1 <= reach[id].depth)
+ DPRINT((", "));
+ }
+ DPRINT(("\n tags "));
+ for (i = 0; i < num_tags; i++)
+ {
+ DPRINT(("%02d", reach[id].tags[i]));
+ if (i + 1 < num_tags)
+ DPRINT((","));
+ }
+ DPRINT(("\n"));
+ }
+ DPRINT(("\n"));
+}
+#endif /* TRE_DEBUG */
+
+
+/* Sets the matching parameters in `reach' to the ones defined in the `pa'
+ array. If `pa' specifies default values, they are taken from
+ `default_params'. */
+inline static void
+tre_set_params(tre_tnfa_approx_reach_t *reach,
+ int *pa, regaparams_t default_params)
+{
+ int value;
+
+ /* If depth is increased reset costs and counters to zero for the
+ new levels. */
+ value = pa[TRE_PARAM_DEPTH];
+ assert(value <= TRE_M_MAX_DEPTH);
+ if (value > reach->depth)
+ {
+ int i, j;
+ for (i = reach->depth + 1; i <= value; i++)
+ for (j = 0; j < TRE_M_LAST; j++)
+ reach->costs[i][j] = 0;
+ }
+ reach->depth = value;
+
+ /* Set insert cost. */
+ value = pa[TRE_PARAM_COST_INS];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.cost_ins = default_params.cost_ins;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.cost_ins = value;
+
+ /* Set delete cost. */
+ value = pa[TRE_PARAM_COST_DEL];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.cost_del = default_params.cost_del;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.cost_del = value;
+
+ /* Set substitute cost. */
+ value = pa[TRE_PARAM_COST_SUBST];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.cost_subst = default_params.cost_subst;
+ else
+ reach->params.cost_subst = value;
+
+ /* Set maximum cost. */
+ value = pa[TRE_PARAM_COST_MAX];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_cost = default_params.max_cost;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_cost = value;
+
+ /* Set maximum inserts. */
+ value = pa[TRE_PARAM_MAX_INS];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_ins = default_params.max_ins;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_ins = value;
+
+ /* Set maximum deletes. */
+ value = pa[TRE_PARAM_MAX_DEL];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_del = default_params.max_del;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_del = value;
+
+ /* Set maximum substitutes. */
+ value = pa[TRE_PARAM_MAX_SUBST];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_subst = default_params.max_subst;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_subst = value;
+
+ /* Set maximum number of errors. */
+ value = pa[TRE_PARAM_MAX_ERR];
+ if (value == TRE_PARAM_DEFAULT)
+ reach->params.max_err = default_params.max_err;
+ else if (value != TRE_PARAM_UNSET)
+ reach->params.max_err = value;
+}
+
+reg_errcode_t
+tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
+ tre_str_type_t type, int *match_tags,
+ regamatch_t *match, regaparams_t default_params,
+ int eflags, int *match_end_ofs)
+{
+ /* State variables required by GET_NEXT_WCHAR. */
+ tre_char_t prev_c = 0, next_c = 0;
+ const char *str_byte = string;
+ int pos = -1;
+ unsigned int pos_add_next = 1;
+#ifdef TRE_WCHAR
+ const wchar_t *str_wide = string;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* !TRE_WCHAR */
+#endif /* TRE_WCHAR */
+ int reg_notbol = eflags & REG_NOTBOL;
+ int reg_noteol = eflags & REG_NOTEOL;
+ int reg_newline = tnfa->cflags & REG_NEWLINE;
+ int str_user_end = 0;
+
+ int prev_pos;
+
+ /* Number of tags. */
+ int num_tags;
+ /* The reach tables. */
+ tre_tnfa_approx_reach_t *reach, *reach_next;
+ /* Tag array for temporary use. */
+ int *tmp_tags;
+
+ /* End offset of best match so far, or -1 if no match found yet. */
+ int match_eo = -1;
+ /* Costs of the match. */
+ int match_costs[TRE_M_LAST];
+
+ /* Space for temporary data required for matching. */
+ unsigned char *buf;
+
+ int i, id;
+
+ if (!match_tags)
+ num_tags = 0;
+ else
+ num_tags = tnfa->num_tags;
+
+#ifdef TRE_MBSTATE
+ memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+ DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
+ "match_tags %p\n",
+ type, len, eflags,
+ match_tags));
+ DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
+ default_params.max_cost,
+ default_params.cost_ins,
+ default_params.cost_del,
+ default_params.cost_subst));
+
+ /* Allocate memory for temporary data required for matching. This needs to
+ be done for every matching operation to be thread safe. This allocates
+ everything in a single large block from the stack frame using alloca()
+ or with malloc() if alloca is unavailable. */
+ {
+ unsigned char *buf_cursor;
+ /* Space needed for one array of tags. */
+ int tag_bytes = sizeof(*tmp_tags) * num_tags;
+ /* Space needed for one reach table. */
+ int reach_bytes = sizeof(*reach_next) * tnfa->num_states;
+ /* Total space needed. */
+ int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
+ /* Add some extra to make sure we can align the pointers. The multiplier
+ used here must be equal to the number of ALIGN calls below. */
+ total_bytes += (sizeof(long) - 1) * 3;
+
+ /* Allocate the memory. */
+#ifdef TRE_USE_ALLOCA
+ buf = alloca(total_bytes);
+#else /* !TRE_USE_ALLOCA */
+ buf = xmalloc((unsigned)total_bytes);
+#endif /* !TRE_USE_ALLOCA */
+ if (!buf)
+ return REG_ESPACE;
+ memset(buf, 0, (size_t)total_bytes);
+
+ /* Allocate `tmp_tags' from `buf'. */
+ tmp_tags = (void *)buf;
+ buf_cursor = buf + tag_bytes;
+ buf_cursor += ALIGN(buf_cursor, long);
+
+ /* Allocate `reach' from `buf'. */
+ reach = (void *)buf_cursor;
+ buf_cursor += reach_bytes;
+ buf_cursor += ALIGN(buf_cursor, long);
+
+ /* Allocate `reach_next' from `buf'. */
+ reach_next = (void *)buf_cursor;
+ buf_cursor += reach_bytes;
+ buf_cursor += ALIGN(buf_cursor, long);
+
+ /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
+ for (i = 0; i < tnfa->num_states; i++)
+ {
+ reach[i].tags = (void *)buf_cursor;
+ buf_cursor += tag_bytes;
+ reach_next[i].tags = (void *)buf_cursor;
+ buf_cursor += tag_bytes;
+ }
+ assert(buf_cursor <= buf + total_bytes);
+ }
+
+ for (i = 0; i < TRE_M_LAST; i++)
+ match_costs[i] = INT_MAX;
+
+ /* Mark the reach arrays empty. */
+ for (i = 0; i < tnfa->num_states; i++)
+ reach[i].pos = reach_next[i].pos = -2;
+
+ prev_pos = pos;
+ GET_NEXT_WCHAR();
+ pos = 0;
+
+ while (/*CONSTCOND*/1)
+ {
+ DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
+
+ /* Add initial states to `reach_next' if an exact match has not yet
+ been found. */
+ if (match_costs[TRE_M_COST] > 0)
+ {
+ tre_tnfa_transition_t *trans;
+ DPRINT((" init"));
+ for (trans = tnfa->initial; trans->state; trans++)
+ {
+ int stateid = trans->state_id;
+
+ /* If this state is not currently in `reach_next', add it
+ there. */
+ if (reach_next[stateid].pos < pos)
+ {
+ if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
+ {
+ /* Assertions failed, don't add this state. */
+ DPRINT((" !%d (assert)", stateid));
+ continue;
+ }
+ DPRINT((" %d", stateid));
+ reach_next[stateid].state = trans->state;
+ reach_next[stateid].pos = pos;
+
+ /* Compute tag values after this transition. */
+ for (i = 0; i < num_tags; i++)
+ reach_next[stateid].tags[i] = -1;
+
+ if (trans->tags)
+ for (i = 0; trans->tags[i] >= 0; i++)
+ if (trans->tags[i] < num_tags)
+ reach_next[stateid].tags[trans->tags[i]] = pos;
+
+ /* Set the parameters, depth, and costs. */
+ reach_next[stateid].params = default_params;
+ reach_next[stateid].depth = 0;
+ for (i = 0; i < TRE_M_LAST; i++)
+ reach_next[stateid].costs[0][i] = 0;
+ if (trans->params)
+ tre_set_params(&reach_next[stateid], trans->params,
+ default_params);
+
+ /* If this is the final state, mark the exact match. */
+ if (trans->state == tnfa->final)
+ {
+ match_eo = pos;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = reach_next[stateid].tags[i];
+ for (i = 0; i < TRE_M_LAST; i++)
+ match_costs[i] = 0;
+ }
+ }
+ }
+ DPRINT(("\n"));
+ }
+
+
+ /* Handle inserts. This is done by pretending there's an epsilon
+ transition from each state in `reach' back to the same state.
+ We don't need to worry about the final state here; this will never
+ give a better match than what we already have. */
+ for (id = 0; id < tnfa->num_states; id++)
+ {
+ int depth;
+ int cost, cost0;
+
+ if (reach[id].pos != prev_pos)
+ {
+ DPRINT((" insert: %d not reached\n", id));
+ continue; /* Not reached. */
+ }
+
+ depth = reach[id].depth;
+
+ /* Compute and check cost at current depth. */
+ cost = reach[id].costs[depth][TRE_M_COST];
+ if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
+ cost += reach[id].params.cost_ins;
+ if (cost > reach[id].params.max_cost)
+ continue; /* Cost too large. */
+
+ /* Check number of inserts at current depth. */
+ if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
+ > reach[id].params.max_ins)
+ continue; /* Too many inserts. */
+
+ /* Check total number of errors at current depth. */
+ if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
+ > reach[id].params.max_err)
+ continue; /* Too many errors. */
+
+ /* Compute overall cost. */
+ cost0 = cost;
+ if (depth > 0)
+ {
+ cost0 = reach[id].costs[0][TRE_M_COST];
+ if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
+ cost0 += reach[id].params.cost_ins;
+ else
+ cost0 += default_params.cost_ins;
+ }
+
+ DPRINT((" insert: from %d to %d, cost %d: ", id, id,
+ reach[id].costs[depth][TRE_M_COST]));
+ if (reach_next[id].pos == pos
+ && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
+ {
+ DPRINT(("lose\n"));
+ continue;
+ }
+ DPRINT(("win\n"));
+
+ /* Copy state, position, tags, parameters, and depth. */
+ reach_next[id].state = reach[id].state;
+ reach_next[id].pos = pos;
+ for (i = 0; i < num_tags; i++)
+ reach_next[id].tags[i] = reach[id].tags[i];
+ reach_next[id].params = reach[id].params;
+ reach_next[id].depth = reach[id].depth;
+
+ /* Set the costs after this transition. */
+ memcpy(reach_next[id].costs, reach[id].costs,
+ sizeof(reach_next[id].costs[0][0])
+ * TRE_M_LAST * (depth + 1));
+ reach_next[id].costs[depth][TRE_M_COST] = cost;
+ reach_next[id].costs[depth][TRE_M_NUM_INS]++;
+ reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
+ if (depth > 0)
+ {
+ reach_next[id].costs[0][TRE_M_COST] = cost0;
+ reach_next[id].costs[0][TRE_M_NUM_INS]++;
+ reach_next[id].costs[0][TRE_M_NUM_ERR]++;
+ }
+
+ }
+
+
+ /* Handle deletes. This is done by traversing through the whole TNFA
+ pretending that all transitions are epsilon transitions, until
+ no more states can be reached with better costs. */
+ {
+ /* XXX - dynamic ringbuffer size */
+ tre_tnfa_approx_reach_t *ringbuffer[512];
+ tre_tnfa_approx_reach_t **deque_start, **deque_end;
+
+ deque_start = deque_end = ringbuffer;
+
+ /* Add all states in `reach_next' to the deque. */
+ for (id = 0; id < tnfa->num_states; id++)
+ {
+ if (reach_next[id].pos != pos)
+ continue;
+ *deque_end = &reach_next[id];
+ deque_end++;
+ assert(deque_end != deque_start);
+ }
+
+ /* Repeat until the deque is empty. */
+ while (deque_end != deque_start)
+ {
+ tre_tnfa_approx_reach_t *reach_p;
+ int depth;
+ int cost, cost0;
+ tre_tnfa_transition_t *trans;
+
+ /* Pop the first item off the deque. */
+ reach_p = *deque_start;
+ id = reach_p - reach_next;
+ depth = reach_p->depth;
+
+ /* Compute cost at current depth. */
+ cost = reach_p->costs[depth][TRE_M_COST];
+ if (reach_p->params.cost_del != TRE_PARAM_UNSET)
+ cost += reach_p->params.cost_del;
+
+ /* Check cost, number of deletes, and total number of errors
+ at current depth. */
+ if (cost > reach_p->params.max_cost
+ || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
+ > reach_p->params.max_del)
+ || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
+ > reach_p->params.max_err))
+ {
+ /* Too many errors or cost too large. */
+ DPRINT((" delete: from %03d: cost too large\n", id));
+ deque_start++;
+ if (deque_start >= (ringbuffer + 512))
+ deque_start = ringbuffer;
+ continue;
+ }
+
+ /* Compute overall cost. */
+ cost0 = cost;
+ if (depth > 0)
+ {
+ cost0 = reach_p->costs[0][TRE_M_COST];
+ if (reach_p->params.cost_del != TRE_PARAM_UNSET)
+ cost0 += reach_p->params.cost_del;
+ else
+ cost0 += default_params.cost_del;
+ }
+
+ for (trans = reach_p->state; trans->state; trans++)
+ {
+ int dest_id = trans->state_id;
+ DPRINT((" delete: from %03d to %03d, cost %d (%d): ",
+ id, dest_id, cost0, reach_p->params.max_cost));
+
+ if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
+ {
+ DPRINT(("assertion failed\n"));
+ continue;
+ }
+
+ /* Compute tag values after this transition. */
+ for (i = 0; i < num_tags; i++)
+ tmp_tags[i] = reach_p->tags[i];
+ if (trans->tags)
+ for (i = 0; trans->tags[i] >= 0; i++)
+ if (trans->tags[i] < num_tags)
+ tmp_tags[trans->tags[i]] = pos;
+
+ /* If another path has also reached this state, choose the one
+ with the smallest cost or best tags if costs are equal. */
+ if (reach_next[dest_id].pos == pos
+ && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
+ || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
+ && (!match_tags
+ || !tre_tag_order(num_tags,
+ tnfa->tag_directions,
+ tmp_tags,
+ reach_next[dest_id].tags)))))
+ {
+ DPRINT(("lose, cost0 %d, have %d\n",
+ cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
+ continue;
+ }
+ DPRINT(("win\n"));
+
+ /* Set state, position, tags, parameters, depth, and costs. */
+ reach_next[dest_id].state = trans->state;
+ reach_next[dest_id].pos = pos;
+ for (i = 0; i < num_tags; i++)
+ reach_next[dest_id].tags[i] = tmp_tags[i];
+
+ reach_next[dest_id].params = reach_p->params;
+ if (trans->params)
+ tre_set_params(&reach_next[dest_id], trans->params,
+ default_params);
+
+ reach_next[dest_id].depth = reach_p->depth;
+ memcpy(&reach_next[dest_id].costs,
+ reach_p->costs,
+ sizeof(reach_p->costs[0][0])
+ * TRE_M_LAST * (depth + 1));
+ reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
+ reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
+ reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
+ if (depth > 0)
+ {
+ reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
+ reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
+ reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
+ }
+
+ if (trans->state == tnfa->final
+ && (match_eo < 0
+ || match_costs[TRE_M_COST] > cost0
+ || (match_costs[TRE_M_COST] == cost0
+ && (num_tags > 0
+ && tmp_tags[0] <= match_tags[0]))))
+ {
+ DPRINT((" setting new match at %d, cost %d\n",
+ pos, cost0));
+ match_eo = pos;
+ memcpy(match_costs, reach_next[dest_id].costs[0],
+ sizeof(match_costs[0]) * TRE_M_LAST);
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = tmp_tags[i];
+ }
+
+ /* Add to the end of the deque. */
+ *deque_end = &reach_next[dest_id];
+ deque_end++;
+ if (deque_end >= (ringbuffer + 512))
+ deque_end = ringbuffer;
+ assert(deque_end != deque_start);
+ }
+ deque_start++;
+ if (deque_start >= (ringbuffer + 512))
+ deque_start = ringbuffer;
+ }
+
+ }
+
+#ifdef TRE_DEBUG
+ tre_print_reach(tnfa, reach_next, pos, num_tags);
+#endif /* TRE_DEBUG */
+
+ /* Check for end of string. */
+ if (len < 0)
+ {
+ if (type == STR_USER)
+ {
+ if (str_user_end)
+ break;
+ }
+ else if (next_c == L'\0')
+ break;
+ }
+ else
+ {
+ if (pos >= len)
+ break;
+ }
+
+ prev_pos = pos;
+ GET_NEXT_WCHAR();
+
+ /* Swap `reach' and `reach_next'. */
+ {
+ tre_tnfa_approx_reach_t *tmp;
+ tmp = reach;
+ reach = reach_next;
+ reach_next = tmp;
+ }
+
+ /* Handle exact matches and substitutions. */
+ for (id = 0; id < tnfa->num_states; id++)
+ {
+ tre_tnfa_transition_t *trans;
+
+ if (reach[id].pos < prev_pos)
+ continue; /* Not reached. */
+ for (trans = reach[id].state; trans->state; trans++)
+ {
+ int dest_id;
+ int depth;
+ int cost, cost0, err;
+
+ if (trans->assertions
+ && (CHECK_ASSERTIONS(trans->assertions)
+ || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
+ {
+ DPRINT((" exact, from %d: assert failed\n", id));
+ continue;
+ }
+
+ depth = reach[id].depth;
+ dest_id = trans->state_id;
+
+ cost = reach[id].costs[depth][TRE_M_COST];
+ cost0 = reach[id].costs[0][TRE_M_COST];
+ err = 0;
+
+ if (trans->code_min > (tre_cint_t)prev_c
+ || trans->code_max < (tre_cint_t)prev_c)
+ {
+ /* Handle substitutes. The required character was not in
+ the string, so match it in place of whatever was supposed
+ to be there and increase costs accordingly. */
+ err = 1;
+
+ /* Compute and check cost at current depth. */
+ cost = reach[id].costs[depth][TRE_M_COST];
+ if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
+ cost += reach[id].params.cost_subst;
+ if (cost > reach[id].params.max_cost)
+ continue; /* Cost too large. */
+
+ /* Check number of substitutes at current depth. */
+ if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
+ > reach[id].params.max_subst)
+ continue; /* Too many substitutes. */
+
+ /* Check total number of errors at current depth. */
+ if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
+ > reach[id].params.max_err)
+ continue; /* Too many errors. */
+
+ /* Compute overall cost. */
+ cost0 = cost;
+ if (depth > 0)
+ {
+ cost0 = reach[id].costs[0][TRE_M_COST];
+ if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
+ cost0 += reach[id].params.cost_subst;
+ else
+ cost0 += default_params.cost_subst;
+ }
+ DPRINT((" subst, from %03d to %03d, cost %d: ",
+ id, dest_id, cost0));
+ }
+ else
+ DPRINT((" exact, from %03d to %03d, cost %d: ",
+ id, dest_id, cost0));
+
+ /* Compute tag values after this transition. */
+ for (i = 0; i < num_tags; i++)
+ tmp_tags[i] = reach[id].tags[i];
+ if (trans->tags)
+ for (i = 0; trans->tags[i] >= 0; i++)
+ if (trans->tags[i] < num_tags)
+ tmp_tags[trans->tags[i]] = pos;
+
+ /* If another path has also reached this state, choose the
+ one with the smallest cost or best tags if costs are equal. */
+ if (reach_next[dest_id].pos == pos
+ && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
+ || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
+ && !tre_tag_order(num_tags, tnfa->tag_directions,
+ tmp_tags,
+ reach_next[dest_id].tags))))
+ {
+ DPRINT(("lose\n"));
+ continue;
+ }
+ DPRINT(("win %d %d\n",
+ reach_next[dest_id].pos,
+ reach_next[dest_id].costs[0][TRE_M_COST]));
+
+ /* Set state, position, tags, and depth. */
+ reach_next[dest_id].state = trans->state;
+ reach_next[dest_id].pos = pos;
+ for (i = 0; i < num_tags; i++)
+ reach_next[dest_id].tags[i] = tmp_tags[i];
+ reach_next[dest_id].depth = reach[id].depth;
+
+ /* Set parameters. */
+ reach_next[dest_id].params = reach[id].params;
+ if (trans->params)
+ tre_set_params(&reach_next[dest_id], trans->params,
+ default_params);
+
+ /* Set the costs after this transition. */
+ memcpy(&reach_next[dest_id].costs,
+ reach[id].costs,
+ sizeof(reach[id].costs[0][0])
+ * TRE_M_LAST * (depth + 1));
+ reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
+ reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
+ reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
+ if (depth > 0)
+ {
+ reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
+ reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
+ reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
+ }
+
+ if (trans->state == tnfa->final
+ && (match_eo < 0
+ || cost0 < match_costs[TRE_M_COST]
+ || (cost0 == match_costs[TRE_M_COST]
+ && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
+ {
+ DPRINT((" setting new match at %d, cost %d\n",
+ pos, cost0));
+ match_eo = pos;
+ for (i = 0; i < TRE_M_LAST; i++)
+ match_costs[i] = reach_next[dest_id].costs[0][i];
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = tmp_tags[i];
+ }
+ }
+ }
+ }
+
+ DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
+ match_costs[TRE_M_COST]));
+
+#ifndef TRE_USE_ALLOCA
+ if (buf)
+ xfree(buf);
+#endif /* !TRE_USE_ALLOCA */
+
+ match->cost = match_costs[TRE_M_COST];
+ match->num_ins = match_costs[TRE_M_NUM_INS];
+ match->num_del = match_costs[TRE_M_NUM_DEL];
+ match->num_subst = match_costs[TRE_M_NUM_SUBST];
+ *match_end_ofs = match_eo;
+
+ return match_eo >= 0 ? REG_OK : REG_NOMATCH;
+}