diff options
Diffstat (limited to 'lib/tre-match-approx.c')
-rw-r--r-- | lib/tre-match-approx.c | 812 |
1 files changed, 812 insertions, 0 deletions
diff --git a/lib/tre-match-approx.c b/lib/tre-match-approx.c new file mode 100644 index 0000000000000..74ea330689053 --- /dev/null +++ b/lib/tre-match-approx.c @@ -0,0 +1,812 @@ +/* + tre-match-approx.c - TRE approximate regex matching engine + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +/* AIX requires this to be the first thing in the file. */ +#ifdef TRE_USE_ALLOCA +#ifndef __GNUC__ +# if HAVE_ALLOCA_H +# include <alloca.h> +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif +#endif /* TRE_USE_ALLOCA */ + +#define __USE_STRING_INLINES +#undef __NO_INLINE__ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#ifdef HAVE_WCHAR_H +#include <wchar.h> +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include <wctype.h> +#endif /* HAVE_WCTYPE_H */ +#ifndef TRE_WCHAR +#include <ctype.h> +#endif /* !TRE_WCHAR */ +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif /* HAVE_MALLOC_H */ + +#include "tre-internal.h" +#include "tre-match-utils.h" +#include "tre.h" +#include "xmalloc.h" + +#define TRE_M_COST 0 +#define TRE_M_NUM_INS 1 +#define TRE_M_NUM_DEL 2 +#define TRE_M_NUM_SUBST 3 +#define TRE_M_NUM_ERR 4 +#define TRE_M_LAST 5 + +#define TRE_M_MAX_DEPTH 3 + +typedef struct { + /* State in the TNFA transition table. */ + tre_tnfa_transition_t *state; + /* Position in input string. */ + int pos; + /* Tag values. */ + int *tags; + /* Matching parameters. */ + regaparams_t params; + /* Nesting depth of parameters. This is used as an index in + the `costs' array. */ + int depth; + /* Costs and counter values for different parameter nesting depths. */ + int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST]; +} tre_tnfa_approx_reach_t; + + +#ifdef TRE_DEBUG +/* Prints the `reach' array in a readable fashion with DPRINT. */ +static void +tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach, + int pos, int num_tags) +{ + int id; + + /* Print each state on one line. */ + DPRINT((" reach:\n")); + for (id = 0; id < tnfa->num_states; id++) + { + int i, j; + if (reach[id].pos < pos) + continue; /* Not reached. */ + DPRINT((" %03d, costs ", id)); + for (i = 0; i <= reach[id].depth; i++) + { + DPRINT(("[")); + for (j = 0; j < TRE_M_LAST; j++) + { + DPRINT(("%2d", reach[id].costs[i][j])); + if (j + 1 < TRE_M_LAST) + DPRINT((",")); + } + DPRINT(("]")); + if (i + 1 <= reach[id].depth) + DPRINT((", ")); + } + DPRINT(("\n tags ")); + for (i = 0; i < num_tags; i++) + { + DPRINT(("%02d", reach[id].tags[i])); + if (i + 1 < num_tags) + DPRINT((",")); + } + DPRINT(("\n")); + } + DPRINT(("\n")); +} +#endif /* TRE_DEBUG */ + + +/* Sets the matching parameters in `reach' to the ones defined in the `pa' + array. If `pa' specifies default values, they are taken from + `default_params'. */ +inline static void +tre_set_params(tre_tnfa_approx_reach_t *reach, + int *pa, regaparams_t default_params) +{ + int value; + + /* If depth is increased reset costs and counters to zero for the + new levels. */ + value = pa[TRE_PARAM_DEPTH]; + assert(value <= TRE_M_MAX_DEPTH); + if (value > reach->depth) + { + int i, j; + for (i = reach->depth + 1; i <= value; i++) + for (j = 0; j < TRE_M_LAST; j++) + reach->costs[i][j] = 0; + } + reach->depth = value; + + /* Set insert cost. */ + value = pa[TRE_PARAM_COST_INS]; + if (value == TRE_PARAM_DEFAULT) + reach->params.cost_ins = default_params.cost_ins; + else if (value != TRE_PARAM_UNSET) + reach->params.cost_ins = value; + + /* Set delete cost. */ + value = pa[TRE_PARAM_COST_DEL]; + if (value == TRE_PARAM_DEFAULT) + reach->params.cost_del = default_params.cost_del; + else if (value != TRE_PARAM_UNSET) + reach->params.cost_del = value; + + /* Set substitute cost. */ + value = pa[TRE_PARAM_COST_SUBST]; + if (value == TRE_PARAM_DEFAULT) + reach->params.cost_subst = default_params.cost_subst; + else + reach->params.cost_subst = value; + + /* Set maximum cost. */ + value = pa[TRE_PARAM_COST_MAX]; + if (value == TRE_PARAM_DEFAULT) + reach->params.max_cost = default_params.max_cost; + else if (value != TRE_PARAM_UNSET) + reach->params.max_cost = value; + + /* Set maximum inserts. */ + value = pa[TRE_PARAM_MAX_INS]; + if (value == TRE_PARAM_DEFAULT) + reach->params.max_ins = default_params.max_ins; + else if (value != TRE_PARAM_UNSET) + reach->params.max_ins = value; + + /* Set maximum deletes. */ + value = pa[TRE_PARAM_MAX_DEL]; + if (value == TRE_PARAM_DEFAULT) + reach->params.max_del = default_params.max_del; + else if (value != TRE_PARAM_UNSET) + reach->params.max_del = value; + + /* Set maximum substitutes. */ + value = pa[TRE_PARAM_MAX_SUBST]; + if (value == TRE_PARAM_DEFAULT) + reach->params.max_subst = default_params.max_subst; + else if (value != TRE_PARAM_UNSET) + reach->params.max_subst = value; + + /* Set maximum number of errors. */ + value = pa[TRE_PARAM_MAX_ERR]; + if (value == TRE_PARAM_DEFAULT) + reach->params.max_err = default_params.max_err; + else if (value != TRE_PARAM_UNSET) + reach->params.max_err = value; +} + +reg_errcode_t +tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len, + tre_str_type_t type, int *match_tags, + regamatch_t *match, regaparams_t default_params, + int eflags, int *match_end_ofs) +{ + /* State variables required by GET_NEXT_WCHAR. */ + tre_char_t prev_c = 0, next_c = 0; + const char *str_byte = string; + int pos = -1; + unsigned int pos_add_next = 1; +#ifdef TRE_WCHAR + const wchar_t *str_wide = string; +#ifdef TRE_MBSTATE + mbstate_t mbstate; +#endif /* !TRE_WCHAR */ +#endif /* TRE_WCHAR */ + int reg_notbol = eflags & REG_NOTBOL; + int reg_noteol = eflags & REG_NOTEOL; + int reg_newline = tnfa->cflags & REG_NEWLINE; + int str_user_end = 0; + + int prev_pos; + + /* Number of tags. */ + int num_tags; + /* The reach tables. */ + tre_tnfa_approx_reach_t *reach, *reach_next; + /* Tag array for temporary use. */ + int *tmp_tags; + + /* End offset of best match so far, or -1 if no match found yet. */ + int match_eo = -1; + /* Costs of the match. */ + int match_costs[TRE_M_LAST]; + + /* Space for temporary data required for matching. */ + unsigned char *buf; + + int i, id; + + if (!match_tags) + num_tags = 0; + else + num_tags = tnfa->num_tags; + +#ifdef TRE_MBSTATE + memset(&mbstate, '\0', sizeof(mbstate)); +#endif /* TRE_MBSTATE */ + + DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, " + "match_tags %p\n", + type, len, eflags, + match_tags)); + DPRINT(("max cost %d, ins %d, del %d, subst %d\n", + default_params.max_cost, + default_params.cost_ins, + default_params.cost_del, + default_params.cost_subst)); + + /* Allocate memory for temporary data required for matching. This needs to + be done for every matching operation to be thread safe. This allocates + everything in a single large block from the stack frame using alloca() + or with malloc() if alloca is unavailable. */ + { + unsigned char *buf_cursor; + /* Space needed for one array of tags. */ + int tag_bytes = sizeof(*tmp_tags) * num_tags; + /* Space needed for one reach table. */ + int reach_bytes = sizeof(*reach_next) * tnfa->num_states; + /* Total space needed. */ + int total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes; + /* Add some extra to make sure we can align the pointers. The multiplier + used here must be equal to the number of ALIGN calls below. */ + total_bytes += (sizeof(long) - 1) * 3; + + /* Allocate the memory. */ +#ifdef TRE_USE_ALLOCA + buf = alloca(total_bytes); +#else /* !TRE_USE_ALLOCA */ + buf = xmalloc((unsigned)total_bytes); +#endif /* !TRE_USE_ALLOCA */ + if (!buf) + return REG_ESPACE; + memset(buf, 0, (size_t)total_bytes); + + /* Allocate `tmp_tags' from `buf'. */ + tmp_tags = (void *)buf; + buf_cursor = buf + tag_bytes; + buf_cursor += ALIGN(buf_cursor, long); + + /* Allocate `reach' from `buf'. */ + reach = (void *)buf_cursor; + buf_cursor += reach_bytes; + buf_cursor += ALIGN(buf_cursor, long); + + /* Allocate `reach_next' from `buf'. */ + reach_next = (void *)buf_cursor; + buf_cursor += reach_bytes; + buf_cursor += ALIGN(buf_cursor, long); + + /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */ + for (i = 0; i < tnfa->num_states; i++) + { + reach[i].tags = (void *)buf_cursor; + buf_cursor += tag_bytes; + reach_next[i].tags = (void *)buf_cursor; + buf_cursor += tag_bytes; + } + assert(buf_cursor <= buf + total_bytes); + } + + for (i = 0; i < TRE_M_LAST; i++) + match_costs[i] = INT_MAX; + + /* Mark the reach arrays empty. */ + for (i = 0; i < tnfa->num_states; i++) + reach[i].pos = reach_next[i].pos = -2; + + prev_pos = pos; + GET_NEXT_WCHAR(); + pos = 0; + + while (/*CONSTCOND*/1) + { + DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c)); + + /* Add initial states to `reach_next' if an exact match has not yet + been found. */ + if (match_costs[TRE_M_COST] > 0) + { + tre_tnfa_transition_t *trans; + DPRINT((" init")); + for (trans = tnfa->initial; trans->state; trans++) + { + int stateid = trans->state_id; + + /* If this state is not currently in `reach_next', add it + there. */ + if (reach_next[stateid].pos < pos) + { + if (trans->assertions && CHECK_ASSERTIONS(trans->assertions)) + { + /* Assertions failed, don't add this state. */ + DPRINT((" !%d (assert)", stateid)); + continue; + } + DPRINT((" %d", stateid)); + reach_next[stateid].state = trans->state; + reach_next[stateid].pos = pos; + + /* Compute tag values after this transition. */ + for (i = 0; i < num_tags; i++) + reach_next[stateid].tags[i] = -1; + + if (trans->tags) + for (i = 0; trans->tags[i] >= 0; i++) + if (trans->tags[i] < num_tags) + reach_next[stateid].tags[trans->tags[i]] = pos; + + /* Set the parameters, depth, and costs. */ + reach_next[stateid].params = default_params; + reach_next[stateid].depth = 0; + for (i = 0; i < TRE_M_LAST; i++) + reach_next[stateid].costs[0][i] = 0; + if (trans->params) + tre_set_params(&reach_next[stateid], trans->params, + default_params); + + /* If this is the final state, mark the exact match. */ + if (trans->state == tnfa->final) + { + match_eo = pos; + for (i = 0; i < num_tags; i++) + match_tags[i] = reach_next[stateid].tags[i]; + for (i = 0; i < TRE_M_LAST; i++) + match_costs[i] = 0; + } + } + } + DPRINT(("\n")); + } + + + /* Handle inserts. This is done by pretending there's an epsilon + transition from each state in `reach' back to the same state. + We don't need to worry about the final state here; this will never + give a better match than what we already have. */ + for (id = 0; id < tnfa->num_states; id++) + { + int depth; + int cost, cost0; + + if (reach[id].pos != prev_pos) + { + DPRINT((" insert: %d not reached\n", id)); + continue; /* Not reached. */ + } + + depth = reach[id].depth; + + /* Compute and check cost at current depth. */ + cost = reach[id].costs[depth][TRE_M_COST]; + if (reach[id].params.cost_ins != TRE_PARAM_UNSET) + cost += reach[id].params.cost_ins; + if (cost > reach[id].params.max_cost) + continue; /* Cost too large. */ + + /* Check number of inserts at current depth. */ + if (reach[id].costs[depth][TRE_M_NUM_INS] + 1 + > reach[id].params.max_ins) + continue; /* Too many inserts. */ + + /* Check total number of errors at current depth. */ + if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1 + > reach[id].params.max_err) + continue; /* Too many errors. */ + + /* Compute overall cost. */ + cost0 = cost; + if (depth > 0) + { + cost0 = reach[id].costs[0][TRE_M_COST]; + if (reach[id].params.cost_ins != TRE_PARAM_UNSET) + cost0 += reach[id].params.cost_ins; + else + cost0 += default_params.cost_ins; + } + + DPRINT((" insert: from %d to %d, cost %d: ", id, id, + reach[id].costs[depth][TRE_M_COST])); + if (reach_next[id].pos == pos + && (cost0 >= reach_next[id].costs[0][TRE_M_COST])) + { + DPRINT(("lose\n")); + continue; + } + DPRINT(("win\n")); + + /* Copy state, position, tags, parameters, and depth. */ + reach_next[id].state = reach[id].state; + reach_next[id].pos = pos; + for (i = 0; i < num_tags; i++) + reach_next[id].tags[i] = reach[id].tags[i]; + reach_next[id].params = reach[id].params; + reach_next[id].depth = reach[id].depth; + + /* Set the costs after this transition. */ + memcpy(reach_next[id].costs, reach[id].costs, + sizeof(reach_next[id].costs[0][0]) + * TRE_M_LAST * (depth + 1)); + reach_next[id].costs[depth][TRE_M_COST] = cost; + reach_next[id].costs[depth][TRE_M_NUM_INS]++; + reach_next[id].costs[depth][TRE_M_NUM_ERR]++; + if (depth > 0) + { + reach_next[id].costs[0][TRE_M_COST] = cost0; + reach_next[id].costs[0][TRE_M_NUM_INS]++; + reach_next[id].costs[0][TRE_M_NUM_ERR]++; + } + + } + + + /* Handle deletes. This is done by traversing through the whole TNFA + pretending that all transitions are epsilon transitions, until + no more states can be reached with better costs. */ + { + /* XXX - dynamic ringbuffer size */ + tre_tnfa_approx_reach_t *ringbuffer[512]; + tre_tnfa_approx_reach_t **deque_start, **deque_end; + + deque_start = deque_end = ringbuffer; + + /* Add all states in `reach_next' to the deque. */ + for (id = 0; id < tnfa->num_states; id++) + { + if (reach_next[id].pos != pos) + continue; + *deque_end = &reach_next[id]; + deque_end++; + assert(deque_end != deque_start); + } + + /* Repeat until the deque is empty. */ + while (deque_end != deque_start) + { + tre_tnfa_approx_reach_t *reach_p; + int depth; + int cost, cost0; + tre_tnfa_transition_t *trans; + + /* Pop the first item off the deque. */ + reach_p = *deque_start; + id = reach_p - reach_next; + depth = reach_p->depth; + + /* Compute cost at current depth. */ + cost = reach_p->costs[depth][TRE_M_COST]; + if (reach_p->params.cost_del != TRE_PARAM_UNSET) + cost += reach_p->params.cost_del; + + /* Check cost, number of deletes, and total number of errors + at current depth. */ + if (cost > reach_p->params.max_cost + || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1 + > reach_p->params.max_del) + || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1 + > reach_p->params.max_err)) + { + /* Too many errors or cost too large. */ + DPRINT((" delete: from %03d: cost too large\n", id)); + deque_start++; + if (deque_start >= (ringbuffer + 512)) + deque_start = ringbuffer; + continue; + } + + /* Compute overall cost. */ + cost0 = cost; + if (depth > 0) + { + cost0 = reach_p->costs[0][TRE_M_COST]; + if (reach_p->params.cost_del != TRE_PARAM_UNSET) + cost0 += reach_p->params.cost_del; + else + cost0 += default_params.cost_del; + } + + for (trans = reach_p->state; trans->state; trans++) + { + int dest_id = trans->state_id; + DPRINT((" delete: from %03d to %03d, cost %d (%d): ", + id, dest_id, cost0, reach_p->params.max_cost)); + + if (trans->assertions && CHECK_ASSERTIONS(trans->assertions)) + { + DPRINT(("assertion failed\n")); + continue; + } + + /* Compute tag values after this transition. */ + for (i = 0; i < num_tags; i++) + tmp_tags[i] = reach_p->tags[i]; + if (trans->tags) + for (i = 0; trans->tags[i] >= 0; i++) + if (trans->tags[i] < num_tags) + tmp_tags[trans->tags[i]] = pos; + + /* If another path has also reached this state, choose the one + with the smallest cost or best tags if costs are equal. */ + if (reach_next[dest_id].pos == pos + && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST] + || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST] + && (!match_tags + || !tre_tag_order(num_tags, + tnfa->tag_directions, + tmp_tags, + reach_next[dest_id].tags))))) + { + DPRINT(("lose, cost0 %d, have %d\n", + cost0, reach_next[dest_id].costs[0][TRE_M_COST])); + continue; + } + DPRINT(("win\n")); + + /* Set state, position, tags, parameters, depth, and costs. */ + reach_next[dest_id].state = trans->state; + reach_next[dest_id].pos = pos; + for (i = 0; i < num_tags; i++) + reach_next[dest_id].tags[i] = tmp_tags[i]; + + reach_next[dest_id].params = reach_p->params; + if (trans->params) + tre_set_params(&reach_next[dest_id], trans->params, + default_params); + + reach_next[dest_id].depth = reach_p->depth; + memcpy(&reach_next[dest_id].costs, + reach_p->costs, + sizeof(reach_p->costs[0][0]) + * TRE_M_LAST * (depth + 1)); + reach_next[dest_id].costs[depth][TRE_M_COST] = cost; + reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++; + reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++; + if (depth > 0) + { + reach_next[dest_id].costs[0][TRE_M_COST] = cost0; + reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++; + reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++; + } + + if (trans->state == tnfa->final + && (match_eo < 0 + || match_costs[TRE_M_COST] > cost0 + || (match_costs[TRE_M_COST] == cost0 + && (num_tags > 0 + && tmp_tags[0] <= match_tags[0])))) + { + DPRINT((" setting new match at %d, cost %d\n", + pos, cost0)); + match_eo = pos; + memcpy(match_costs, reach_next[dest_id].costs[0], + sizeof(match_costs[0]) * TRE_M_LAST); + for (i = 0; i < num_tags; i++) + match_tags[i] = tmp_tags[i]; + } + + /* Add to the end of the deque. */ + *deque_end = &reach_next[dest_id]; + deque_end++; + if (deque_end >= (ringbuffer + 512)) + deque_end = ringbuffer; + assert(deque_end != deque_start); + } + deque_start++; + if (deque_start >= (ringbuffer + 512)) + deque_start = ringbuffer; + } + + } + +#ifdef TRE_DEBUG + tre_print_reach(tnfa, reach_next, pos, num_tags); +#endif /* TRE_DEBUG */ + + /* Check for end of string. */ + if (len < 0) + { + if (type == STR_USER) + { + if (str_user_end) + break; + } + else if (next_c == L'\0') + break; + } + else + { + if (pos >= len) + break; + } + + prev_pos = pos; + GET_NEXT_WCHAR(); + + /* Swap `reach' and `reach_next'. */ + { + tre_tnfa_approx_reach_t *tmp; + tmp = reach; + reach = reach_next; + reach_next = tmp; + } + + /* Handle exact matches and substitutions. */ + for (id = 0; id < tnfa->num_states; id++) + { + tre_tnfa_transition_t *trans; + + if (reach[id].pos < prev_pos) + continue; /* Not reached. */ + for (trans = reach[id].state; trans->state; trans++) + { + int dest_id; + int depth; + int cost, cost0, err; + + if (trans->assertions + && (CHECK_ASSERTIONS(trans->assertions) + || CHECK_CHAR_CLASSES(trans, tnfa, eflags))) + { + DPRINT((" exact, from %d: assert failed\n", id)); + continue; + } + + depth = reach[id].depth; + dest_id = trans->state_id; + + cost = reach[id].costs[depth][TRE_M_COST]; + cost0 = reach[id].costs[0][TRE_M_COST]; + err = 0; + + if (trans->code_min > (tre_cint_t)prev_c + || trans->code_max < (tre_cint_t)prev_c) + { + /* Handle substitutes. The required character was not in + the string, so match it in place of whatever was supposed + to be there and increase costs accordingly. */ + err = 1; + + /* Compute and check cost at current depth. */ + cost = reach[id].costs[depth][TRE_M_COST]; + if (reach[id].params.cost_subst != TRE_PARAM_UNSET) + cost += reach[id].params.cost_subst; + if (cost > reach[id].params.max_cost) + continue; /* Cost too large. */ + + /* Check number of substitutes at current depth. */ + if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1 + > reach[id].params.max_subst) + continue; /* Too many substitutes. */ + + /* Check total number of errors at current depth. */ + if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1 + > reach[id].params.max_err) + continue; /* Too many errors. */ + + /* Compute overall cost. */ + cost0 = cost; + if (depth > 0) + { + cost0 = reach[id].costs[0][TRE_M_COST]; + if (reach[id].params.cost_subst != TRE_PARAM_UNSET) + cost0 += reach[id].params.cost_subst; + else + cost0 += default_params.cost_subst; + } + DPRINT((" subst, from %03d to %03d, cost %d: ", + id, dest_id, cost0)); + } + else + DPRINT((" exact, from %03d to %03d, cost %d: ", + id, dest_id, cost0)); + + /* Compute tag values after this transition. */ + for (i = 0; i < num_tags; i++) + tmp_tags[i] = reach[id].tags[i]; + if (trans->tags) + for (i = 0; trans->tags[i] >= 0; i++) + if (trans->tags[i] < num_tags) + tmp_tags[trans->tags[i]] = pos; + + /* If another path has also reached this state, choose the + one with the smallest cost or best tags if costs are equal. */ + if (reach_next[dest_id].pos == pos + && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST] + || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST] + && !tre_tag_order(num_tags, tnfa->tag_directions, + tmp_tags, + reach_next[dest_id].tags)))) + { + DPRINT(("lose\n")); + continue; + } + DPRINT(("win %d %d\n", + reach_next[dest_id].pos, + reach_next[dest_id].costs[0][TRE_M_COST])); + + /* Set state, position, tags, and depth. */ + reach_next[dest_id].state = trans->state; + reach_next[dest_id].pos = pos; + for (i = 0; i < num_tags; i++) + reach_next[dest_id].tags[i] = tmp_tags[i]; + reach_next[dest_id].depth = reach[id].depth; + + /* Set parameters. */ + reach_next[dest_id].params = reach[id].params; + if (trans->params) + tre_set_params(&reach_next[dest_id], trans->params, + default_params); + + /* Set the costs after this transition. */ + memcpy(&reach_next[dest_id].costs, + reach[id].costs, + sizeof(reach[id].costs[0][0]) + * TRE_M_LAST * (depth + 1)); + reach_next[dest_id].costs[depth][TRE_M_COST] = cost; + reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err; + reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err; + if (depth > 0) + { + reach_next[dest_id].costs[0][TRE_M_COST] = cost0; + reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err; + reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err; + } + + if (trans->state == tnfa->final + && (match_eo < 0 + || cost0 < match_costs[TRE_M_COST] + || (cost0 == match_costs[TRE_M_COST] + && num_tags > 0 && tmp_tags[0] <= match_tags[0]))) + { + DPRINT((" setting new match at %d, cost %d\n", + pos, cost0)); + match_eo = pos; + for (i = 0; i < TRE_M_LAST; i++) + match_costs[i] = reach_next[dest_id].costs[0][i]; + for (i = 0; i < num_tags; i++) + match_tags[i] = tmp_tags[i]; + } + } + } + } + + DPRINT(("match end offset = %d, match cost = %d\n", match_eo, + match_costs[TRE_M_COST])); + +#ifndef TRE_USE_ALLOCA + if (buf) + xfree(buf); +#endif /* !TRE_USE_ALLOCA */ + + match->cost = match_costs[TRE_M_COST]; + match->num_ins = match_costs[TRE_M_NUM_INS]; + match->num_del = match_costs[TRE_M_NUM_DEL]; + match->num_subst = match_costs[TRE_M_NUM_SUBST]; + *match_end_ofs = match_eo; + + return match_eo >= 0 ? REG_OK : REG_NOMATCH; +} |