diff options
Diffstat (limited to 'src/hash-table.cc')
-rw-r--r-- | src/hash-table.cc | 210 |
1 files changed, 141 insertions, 69 deletions
diff --git a/src/hash-table.cc b/src/hash-table.cc index a147674b3074..d98d90a21109 100644 --- a/src/hash-table.cc +++ b/src/hash-table.cc @@ -1,95 +1,167 @@ /* Hash table for checking keyword links. Implemented using double hashing. - Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc. - written by Douglas C. Schmidt (schmidt@ics.uci.edu) + Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. + Written by Douglas C. Schmidt <schmidt@ics.uci.edu> + and Bruno Haible <bruno@clisp.org>. -This file is part of GNU GPERF. + This file is part of GNU GPERF. -GNU GPERF is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 1, or (at your option) -any later version. + GNU GPERF is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. -GNU GPERF is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + GNU GPERF is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with GNU GPERF; see the file COPYING. If not, write to the Free -Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +/* Specification. */ #include "hash-table.h" #include <stdio.h> #include <string.h> /* declares memset(), strcmp() */ #include <hash.h> #include "options.h" -#include "trace.h" - -/* The size of the hash table is always the smallest power of 2 >= the size - indicated by the user. This allows several optimizations, including - the use of double hashing and elimination of the mod instruction. - Note that the size had better be larger than the number of items - in the hash table, else there's trouble!!! Note that the memory - for the hash table is allocated *outside* the intialization routine. - This compromises information hiding somewhat, but greatly reduces - memory fragmentation, since we can now use alloca! */ - -Hash_Table::Hash_Table (List_Node **table_ptr, int s, int ignore_len): - table (table_ptr), size (s), collisions (0), ignore_length (ignore_len) -{ - T (Trace t ("Hash_Table::Hash_Table");) - memset ((char *) table, 0, size * sizeof (*table)); -} -Hash_Table::~Hash_Table (void) +/* We use a hash table with double hashing. This is the simplest kind of + hash table, given that we always only insert and never remove entries + from the hash table. */ + +/* To make double hashing efficient, there need to be enough spare entries. */ +static const int size_factor = 10; + +/* We make the size of the hash table a power of 2. This allows for two + optimizations: It eliminates the modulo instruction, and allows for an + easy secondary hashing function. */ + +/* Constructor. */ +Hash_Table::Hash_Table (unsigned int size, bool ignore_length) + : _ignore_length (ignore_length), + _collisions (0) { - T (Trace t ("Hash_Table::~Hash_Table");) - if (option[DEBUG]) + /* There need to be enough spare entries. */ + size = size * size_factor; + + /* Find smallest power of 2 that is >= size. */ + unsigned int shift = 0; + if ((size >> 16) > 0) + { + size = size >> 16; + shift += 16; + } + if ((size >> 8) > 0) + { + size = size >> 8; + shift += 8; + } + if ((size >> 4) > 0) + { + size = size >> 4; + shift += 4; + } + if ((size >> 2) > 0) + { + size = size >> 2; + shift += 2; + } + if ((size >> 1) > 0) { - int field_width = option.get_max_keysig_size (); - - fprintf (stderr, - "\ndumping the hash table\n" - "total available table slots = %d, total bytes = %d, total collisions = %d\n" - "location, %*s, keyword\n", - size, size * (int) sizeof (*table), collisions, - field_width, "keysig"); - - for (int i = size - 1; i >= 0; i--) - if (table[i]) - fprintf (stderr, "%8d, %*.*s, %.*s\n", - i, - field_width, table[i]->char_set_length, table[i]->char_set, - table[i]->key_length, table[i]->key); - - fprintf (stderr, "\nend dumping hash table\n\n"); + size = size >> 1; + shift += 1; } + _log_size = shift; + _size = 1 << shift; + + /* Allocate table. */ + _table = new KeywordExt*[_size]; + memset (_table, 0, _size * sizeof (*_table)); } -/* If the ITEM is already in the hash table return the item found - in the table. Otherwise inserts the ITEM, and returns FALSE. - Uses double hashing. */ +/* Destructor. */ +Hash_Table::~Hash_Table () +{ + delete[] _table; +} -List_Node * -Hash_Table::insert (List_Node *item) +/* Print the table's contents. */ +void +Hash_Table::dump () const { - T (Trace t ("Hash_Table::operator()");) - unsigned hash_val = hashpjw (item->char_set, item->char_set_length); - int probe = hash_val & (size - 1); - int increment = ((hash_val ^ item->key_length) | 1) & (size - 1); + int field_width; + + field_width = 0; + { + for (int i = _size - 1; i >= 0; i--) + if (_table[i]) + if (field_width < _table[i]->_selchars_length) + field_width = _table[i]->_selchars_length; + } + + fprintf (stderr, + "\ndumping the hash table\n" + "total available table slots = %d, total bytes = %d, total collisions = %d\n" + "location, %*s, keyword\n", + _size, _size * static_cast<unsigned int>(sizeof (*_table)), + _collisions, field_width, "keysig"); + + for (int i = _size - 1; i >= 0; i--) + if (_table[i]) + { + fprintf (stderr, "%8d, ", i); + if (field_width > _table[i]->_selchars_length) + fprintf (stderr, "%*s", field_width - _table[i]->_selchars_length, ""); + for (int j = 0; j < _table[i]->_selchars_length; j++) + putc (_table[i]->_selchars[j], stderr); + fprintf (stderr, ", %.*s\n", + _table[i]->_allchars_length, _table[i]->_allchars); + } + + fprintf (stderr, "\nend dumping hash table\n\n"); +} - while (table[probe]) +/* Compares two items. */ +inline bool +Hash_Table::equal (KeywordExt *item1, KeywordExt *item2) const +{ + return item1->_selchars_length == item2->_selchars_length + && memcmp (item1->_selchars, item2->_selchars, + item2->_selchars_length * sizeof (unsigned int)) + == 0 + && (_ignore_length + || item1->_allchars_length == item2->_allchars_length); +} + +/* Attempts to insert ITEM in the table. If there is already an equal + entry in it, returns it. Otherwise inserts ITEM and returns NULL. */ +KeywordExt * +Hash_Table::insert (KeywordExt *item) +{ + unsigned hash_val = + hashpjw (reinterpret_cast<const unsigned char *>(item->_selchars), + item->_selchars_length * sizeof (unsigned int)); + unsigned int probe = hash_val & (_size - 1); + unsigned int increment = + (((hash_val >> _log_size) + ^ (_ignore_length ? 0 : item->_allchars_length)) + << 1) + 1; + /* Note that because _size is a power of 2 and increment is odd, + we have gcd(increment,_size) = 1, which guarantees that we'll find + an empty entry during the loop. */ + + while (_table[probe] != NULL) { - if (table[probe]->char_set_length == item->char_set_length - && memcmp (table[probe]->char_set, item->char_set, item->char_set_length) == 0 - && (ignore_length || table[probe]->key_length == item->key_length)) - return table[probe]; + if (equal (_table[probe], item)) + return _table[probe]; - collisions++; - probe = (probe + increment) & (size - 1); + _collisions++; + probe = (probe + increment) & (_size - 1); } - table[probe] = item; - return (List_Node *) 0; + _table[probe] = item; + return NULL; } |