diff options
Diffstat (limited to 'src/keyword.cc')
-rw-r--r-- | src/keyword.cc | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/src/keyword.cc b/src/keyword.cc new file mode 100644 index 000000000000..ae53bcca320a --- /dev/null +++ b/src/keyword.cc @@ -0,0 +1,161 @@ +/* Keyword data. + Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. + Written by Douglas C. Schmidt <schmidt@ics.uci.edu> + and Bruno Haible <bruno@clisp.org>. + + This file is part of GNU GPERF. + + GNU GPERF is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU GPERF is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Specification. */ +#include "keyword.h" + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include "positions.h" + + +/* --------------------------- KeywordExt class --------------------------- */ + +/* Sort a small set of 'unsigned int', base[0..len-1], in place. */ +static inline void sort_char_set (unsigned int *base, int len) +{ + /* Bubble sort is sufficient here. */ + for (int i = 1; i < len; i++) + { + int j; + unsigned int tmp; + + for (j = i, tmp = base[j]; j > 0 && tmp < base[j - 1]; j--) + base[j] = base[j - 1]; + + base[j] = tmp; + } +} + +/* Initializes selchars and selchars_length. + + General idea: + The hash function will be computed as + asso_values[allchars[key_pos[0]]] + + asso_values[allchars[key_pos[1]]] + ... + We compute selchars as the multiset + { allchars[key_pos[0]], allchars[key_pos[1]], ... } + so that the hash function becomes + asso_values[selchars[0]] + asso_values[selchars[1]] + ... + Furthermore we sort the selchars array, to ease detection of duplicates + later. + + More in detail: The arguments alpha_unify (used for case-insensitive + hash functions) and alpha_inc (used to disambiguate permutations) + apply slight modifications. The hash function will be computed as + sum (j=0,1,...: k = key_pos[j]: + asso_values[alpha_unify[allchars[k]+alpha_inc[k]]]) + + (allchars_length if !option[NOLENGTH], 0 otherwise). + We compute selchars as the multiset + { alpha_unify[allchars[k]+alpha_inc[k]] : j=0,1,..., k = key_pos[j] } + so that the hash function becomes + asso_values[selchars[0]] + asso_values[selchars[1]] + ... + + (allchars_length if !option[NOLENGTH], 0 otherwise). + */ + +unsigned int * +KeywordExt::init_selchars_low (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) +{ + /* Iterate through the list of positions, initializing selchars + (via ptr). */ + PositionIterator iter = positions.iterator(_allchars_length); + + unsigned int *key_set = new unsigned int[iter.remaining()]; + unsigned int *ptr = key_set; + + for (int i; (i = iter.next ()) != PositionIterator::EOS; ) + { + unsigned int c; + if (i == Positions::LASTCHAR) + /* Special notation for last KEY position, i.e. '$'. */ + c = static_cast<unsigned char>(_allchars[_allchars_length - 1]); + else if (i < _allchars_length) + { + /* Within range of KEY length, so we'll keep it. */ + c = static_cast<unsigned char>(_allchars[i]); + if (alpha_inc) + c += alpha_inc[i]; + } + else + /* Out of range of KEY length, the iterator should not have + produced this. */ + abort (); + if (alpha_unify) + c = alpha_unify[c]; + *ptr = c; + ptr++; + } + + _selchars = key_set; + _selchars_length = ptr - key_set; + + return key_set; +} + +void +KeywordExt::init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) +{ + init_selchars_low (positions, alpha_unify, NULL); +} + +void +KeywordExt::init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) +{ + unsigned int *selchars = + init_selchars_low (positions, alpha_unify, alpha_inc); + + /* Sort the selchars elements alphabetically. */ + sort_char_set (selchars, _selchars_length); +} + +/* Deletes selchars. */ +void +KeywordExt::delete_selchars () +{ + delete[] const_cast<unsigned int *>(_selchars); +} + + +/* ------------------------- Keyword_Factory class ------------------------- */ + +Keyword_Factory::Keyword_Factory () +{ +} + +Keyword_Factory::~Keyword_Factory () +{ +} + + +/* ------------------------------------------------------------------------- */ + +char empty_string[1] = ""; + + +#ifndef __OPTIMIZE__ + +#define INLINE /* not inline */ +#include "keyword.icc" +#undef INLINE + +#endif /* not defined __OPTIMIZE__ */ |