CVC3  2.4.1
hash_table.h
Go to the documentation of this file.
1 /*****************************************************************************/
2 /*!
3  *\file hash_table.h
4  *\brief hash table implementation
5  *
6  * Author: Alexander Fuchs
7  *
8  * Created: Thu Oct 19 11:04:00 2006
9  *
10  * <hr>
11  *
12  * License to use, copy, modify, sell and/or distribute this software
13  * and its documentation for any purpose is hereby granted without
14  * royalty, subject to the terms and conditions defined in the \ref
15  * LICENSE file provided with this distribution.
16  *
17  * <hr>
18  */
19 /*****************************************************************************/
20 
21 /*
22  * Copyright (c) 1996,1997
23  * Silicon Graphics Computer Systems, Inc.
24  *
25  * Permission to use, copy, modify, distribute and sell this software
26  * and its documentation for any purpose is hereby granted without fee,
27  * provided that the above copyright notice appear in all copies and
28  * that both that copyright notice and this permission notice appear
29  * in supporting documentation. Silicon Graphics makes no
30  * representations about the suitability of this software for any
31  * purpose. It is provided "as is" without express or implied warranty.
32  *
33  *
34  * Copyright (c) 1994
35  * Hewlett-Packard Company
36  *
37  * Permission to use, copy, modify, distribute and sell this software
38  * and its documentation for any purpose is hereby granted without fee,
39  * provided that the above copyright notice appear in all copies and
40  * that both that copyright notice and this permission notice appear
41  * in supporting documentation. Hewlett-Packard Company makes no
42  * representations about the suitability of this software for any
43  * purpose. It is provided "as is" without express or implied warranty.
44  *
45  */
46 
47 // this implementation is in essence a subset of the SGI implementation:
48 // http://www.sgi.com/tech/stl/stl_hashtable.h
49 
50 #ifndef _cvc3__hash__hash_table_h_
51 #define _cvc3__hash__hash_table_h_
52 
53 #include <vector>
54 #include <string>
55 #include <functional>
56 #include <algorithm>
57 #include "hash_fun.h"
58 #include "os.h"
59 
60 // For some reason, including debug.h doesn't work--so redeclare macros here
61 
62 #ifdef _CVC3_DEBUG_MODE
63 #define DebugAssert(cond, str) if(!(cond)) \
64  CVC3::debugError(__FILE__, __LINE__, #cond, str)
65 namespace CVC3 {
66 extern CVC_DLL void debugError(const std::string& file, int line,
67  const std::string& cond, const std::string& msg);
68 }
69 #else
70 #define DebugAssert(cond, str)
71 #endif
72 
73 namespace Hash {
74  // get size_t from hash_fun, which gets it from cstddef
75  typedef size_t size_type;
76 
77  /// primes for increasing the hash table size
78 
79  // Note: assumes size_type is unsigned and at least 32 bits.
80  const size_type num_primes = 28;
81 
82  static const size_type prime_list[num_primes] = {
83  53ul, 97ul, 193ul, 389ul, 769ul,
84  1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
85  49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
86  1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
87  50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
88  1610612741ul, 3221225473ul, 4294967291ul
89  };
90 
92  {
93  const size_type* first = prime_list;
94  const size_type* last = prime_list + (size_type)num_primes;
95  const size_type* pos = std::lower_bound(first, last, n);
96  return pos == last ? *(last - 1) : *pos;
97  }
98 
99 
100  /*! template to instante to hash map and hash set
101 
102  based on the sgi implementation:
103  http://www.sgi.com/tech/stl/HashedAssociativeContainer.html
104 
105  _Key: hash key type
106  _Data: key + value data to store
107  _HashFcn: functional class providing a hash function: int(_Key)
108  Note: in some STL implementations hash is already part of
109  some extension an in namespace std or stdext, in some it is not.
110  So we assume that it is not available. :TODO:
111  _EqualKey: functional class providing a comparison function: bool(_Key, _Key)
112  returns true iff two keys are considered to be equal
113  _ExtractKey: extracts key from _Data: _Key(_Data)
114  */
115  //
116  // can't use debug.h as debug.h uses hash maps...
117  //
118  // implemented as an array of lists (buckets)
119  template <class _Key, class _Value,
120  class _HashFcn, class _EqualKey, class _ExtractKey>
121  class hash_table {
122 
123  /// types
124 
125  public:
126  // interface typedefs
128  typedef _Key key_type;
129  typedef _Value value_type;
130  typedef _HashFcn hasher;
131  typedef _EqualKey key_equal;
132 
133  protected:
134  // a bucket is a list of values
135  // using an STL list makes it more complicated to have a nice end iterator,
136  // as iterators of different lists can not be compared
137  // (at least in the MS STL).
138  // so instead we implement our own single-linked list here,
139  // where NULL is the end iterator for all lists.
140  struct BucketNode {
141  BucketNode(BucketNode* next, const value_type& value)
142  : d_next(next), d_value(value)
143  { };
146  };
147  typedef BucketNode Bucket;
148 
149  // the buckets are kept in an array
150  typedef std::vector<Bucket*> Data;
151  typedef typename Data::iterator data_iter;
152  typedef typename Data::const_iterator data_const_iter;
153 
154 
155  public:
156  // iterators
157  class iterator;
158  friend class iterator;
160  friend class const_iterator;
161 
162 
163 
164  /// variables
165 
166  protected:
167  /// template parameters
168 
169  // the hash function for a key
171 
172  // the equality function between keys
174 
175  // extraction of key from data
176  _ExtractKey d_extractKey;
177 
178 
179  // the current number of elements - stored for efficiency
181 
182  // the hash table - an array of buckets
184 
185 
186  /// methods
187 
188  protected:
189 
190  /// template parameters
191 
192 
193  // the hash function for a key
194  size_type hash(const key_type& key) const {
195  return d_hash(key);
196  }
197 
198  // equality between keys
199  bool equal(const key_type& key1, const key_type& key2) const {
200  return d_equal(key1, key2);
201  }
202 
203  // extraction of a key
204  const key_type& extractKey(const value_type& value) const {
205  return d_extractKey(value);
206  }
207 
208 
209  /// bucket retrieval
210 
211  // returns the index in the array which contains the bucket
212  // with the keys mapping to the same hash value as key
213  size_type getBucketIndex(const key_type& key) const {
214  return (hash(key) % d_data.size());
215  }
216 
218  return getBucketByIndex(getBucketIndex(key));
219  }
220 
221  const Bucket* getBucketByKey(const key_type& key) const {
222  return getBucketByIndex(getBucketIndex(key));
223  }
224 
226  DebugAssert(index < d_data.size(),"hash_table::getBucketByIndex");
227  return d_data.at(index);
228  }
229 
230  const Bucket* getBucketByIndex(const size_type index) const {
231  DebugAssert(index < d_data.size(),"hash_table::getBucketByIndex (const)");
232  return d_data.at(index);
233  }
234 
235 
236 
237  /// resize
238 
239  // increase the size of the table, typically if the load factor is too high
240  void resize() {
241  if (load_factor() > 1) {
242  // create new table with doubled size size
243  //size_type size = 2 * d_data.size();
244  // this is simple, but might not be efficient for bad hash functions,
245  // which for example mainly hash to values which contain 2 as a factor.
246  // thus, instead we go from a prime to a prime of more or less double size.
247  size_type size = next_prime(d_data.size() + 1);
248  Data copy(size, NULL);
249 
250  // move entries to new table
251  for (size_type i = 0; i < d_data.size(); ++i) {
252  // head of current bucket to move
253  BucketNode* bucket = d_data[i];
254  while (bucket != NULL) {
255  // move head of old bucket
256  BucketNode* current = bucket;
257  bucket = bucket->d_next;
258 
259  // move old head to new bucket
260  size_type new_index = hash(extractKey(current->d_value)) % size;
261  BucketNode* new_bucket = copy[new_index];
262  current->d_next = new_bucket;
263  copy[new_index] = current;
264  }
265  d_data[i] = NULL;
266  }
267 
268  d_data.swap(copy);
269  }
270  }
271 
272 
273  public:
274  /// constructors
275 
276  // default size is 16 buckets
278  d_hash(_HashFcn()), d_equal(_EqualKey()), d_extractKey(_ExtractKey()),
279  d_size(0), d_data(16)
280  {
281  init();
282  };
283 
284  // specifiy initial number of buckets - must be positive
285  hash_table(size_type initial_capacity) :
286  d_hash(_HashFcn()), d_equal(_EqualKey()), d_extractKey(_ExtractKey()),
287  d_size(0), d_data(initial_capacity)
288  {
289  init();
290  };
291 
292  // specifiy initial number of buckets and hash function
293  hash_table(size_type initial_capacity, const _HashFcn& hash) :
294  d_hash(hash), d_equal(_EqualKey()), d_extractKey(_ExtractKey()),
295  d_size(0), d_data(initial_capacity)
296  {
297  init();
298  };
299 
300  // specifiy initial number of buckets, hash and equal function
301  hash_table(size_type initial_capacity,
302  const _HashFcn& hash, const _EqualKey& equal) :
303  d_hash(hash), d_equal(equal), d_extractKey(_ExtractKey()),
304  d_size(0), d_data(initial_capacity)
305  {
306  init();
307  };
308 
309  // copy hash table
310  hash_table(const hash_table& other) :
311  d_hash(other.d_hash), d_equal(other.d_equal), d_extractKey(other.d_extractKey),
312  d_size(other.d_size), d_data(0)
313  {
314  assignTable(other.d_data);
315  };
316 
318  clear();
319  }
320 
321  // assign hash table
322  hash_table& operator=(const hash_table& other) {
323  if (this != &other) {
324  clear();
325 
326  d_hash = other.d_hash;
327  d_equal = other.d_equal;
328  d_extractKey = other.d_extractKey;
329  d_size = other.d_size;
330  assignTable(other.d_data);
331  }
332 
333  return *this;
334  }
335 
336 
337  // replaces the current hash table with the given one
338  void assignTable(const Data& data) {
339  // copy elements:
340  // default assignment operator does not work if value_type contains
341  // constants, which should be the case as the key should be constant.
342  // so not even shrinking a vector is possible,
343  // so create a new table instead and swap with the current one.
344  Data tmp(data.size());
345  d_data.swap(tmp);
346 
347  // for each bucket ...
348  for (size_type i = 0; i < data.size(); ++i) {
349  // .. copy each element
350  DebugAssert(i < d_data.size(),"hash_table::operator=");
351 
352  // copy bucket if not empty
353  Bucket* source = data[i];
354  if (source != NULL) {
355  // set first element
356  Bucket* target = new BucketNode(NULL, source->d_value);
357  d_data[i] = target;
358  source = source->d_next;
359 
360  // copy remaining nodes
361  while (source != NULL) {
362  target->d_next = new BucketNode(NULL, source->d_value);
363  target = target->d_next;
364  source = source->d_next;
365  }
366  }
367  }
368  }
369 
370  void swap(hash_table& other) {
371  std::swap(d_hash, other.d_hash);
372  std::swap(d_equal, other.d_equal);
373  std::swap(d_extractKey, other.d_extractKey);
374  std::swap(d_size, other.d_size);
375  d_data.swap(other.d_data);
376  }
377 
378  // sets all buckets to NULL
379  void init() {
380  for (size_type i = 0; i < d_data.size(); ++i) {
381  d_data[i] = NULL;
382  }
383  }
384 
385  // empty all buckets
386  void clear() {
387  d_size = 0;
388 
389  for (size_type i = 0; i < d_data.size(); ++i) {
390  BucketNode* head = d_data[i];
391  while (head != NULL) {
392  BucketNode* next = head->d_next;
393  delete head;
394  head = next;
395  }
396  d_data[i] = NULL;
397  }
398  }
399 
400 
401 
402  /// operations
403 
404 
405  // returns end iterator if key was not bound
406  iterator find(const key_type& key) {
407  for (BucketNode* node = getBucketByKey(key); node != NULL; node = node->d_next) {
408  if (equal(extractKey(node->d_value), key)) {
409  return iterator(this, node);
410  }
411  }
412  return end();
413  }
414 
415  // const version of find
416  const_iterator find(const key_type& key) const {
417  for (const BucketNode* node = getBucketByKey(key); node != NULL; node = node->d_next) {
418  if (equal(extractKey(node->d_value), key)) {
419  return const_iterator(this, node);
420  }
421  }
422  return end();
423  }
424 
425 
426  // adds the mapping from key to data, if key is still unbound
427  // otherwise returns false
428  std::pair<iterator, bool> insert(const value_type& value) {
429  // resize in case we insert
430  resize();
431 
432  const key_type& key = extractKey(value);
433  size_type index = getBucketIndex(key);
434 
435  // check if key is already bound
436  for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) {
437  if (equal(extractKey(node->d_value), key)) {
438  return std::make_pair(end(), false);
439  }
440  }
441 
442  // insert new value
443  ++d_size;
444  d_data[index] = new BucketNode(d_data[index], value);
445  return std::make_pair(iterator(this, d_data[index]), true);
446  }
447 
448  // if key in value is already bound,
449  // returns that bindings,
450  // otherwise inserts value and returns it.
452  // resize in case we insert
453  resize();
454 
455  const key_type& key = extractKey(value);
456  size_type index = getBucketIndex(key);
457 
458  // check if key is already bound
459  for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) {
460  if (equal(extractKey(node->d_value), key)) {
461  return node->d_value;
462  }
463  }
464 
465  // insert new value
466  ++d_size;
467  d_data[index] = new BucketNode(d_data[index], value);
468  return d_data[index]->d_value;
469  }
470 
471 
472  // removes binding of key
473  // returns number of keys removed,
474  // i.e. 1 if key was bound, 0 if key was not bound.
475  size_type erase(const key_type& key) {
476  size_type index = getBucketIndex(key);
477 
478  // keep track of the node previous to the current one
479  BucketNode* prev = NULL;
480  for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) {
481  if (equal(extractKey(node->d_value), key)) {
482  --d_size;
483 
484  // remove the bucket's head
485  if (prev == NULL) {
486  d_data[index] = node->d_next;
487  }
488  // remove within the list;
489  else {
490  prev->d_next = node->d_next;
491  }
492  delete node;
493  return 1;
494  }
495 
496  prev = node;
497  }
498 
499  return 0;
500  }
501 
502  // removes element pointed to by iter,
503  // returns element after iter.
505  const_iterator next(iter);
506  ++next;
507 
508  const key_type& key = extractKey(*iter);
509  size_type index = getBucketIndex(key);
510 
511  // keep track of the node previous to the current one
512  BucketNode* prev = NULL;
513  for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) {
514  if (equal(extractKey(node->d_value), key)) {
515  --d_size;
516 
517  // remove the bucket's head
518  if (prev == NULL) {
519  d_data[index] = node->d_next;
520  }
521  // remove within the list;
522  else {
523  prev->d_next = node->d_next;
524  }
525  delete node;
526  return next;
527  }
528 
529  prev = node;
530  }
531 
532  return next;
533  }
534 
535 
536  /// status
537 
538  // is the key bound?
539  bool contains(const key_type& key) const {
540  return (find(key) != end());
541  }
542 
543  // returns the number of times a key is bound,
544  // i.e. 0 or 1
545  size_type count(const _Key& key) const {
546  if (contains(key)) {
547  return 1;
548  }
549  else {
550  return 0;
551  }
552  }
553 
554  // is the hash table empty?
555  bool empty() const {
556  return (d_size == 0);
557  }
558 
559  // the number of elements in the hash table
560  size_type size() const {
561  return d_size;
562  }
563 
564  // the number of buckets in the hash table
566  return d_data.size();
567  }
568 
569  // returns the average number of elements per bucket
570  float load_factor() const {
571  return (float(d_size) / float(d_data.size()));
572  }
573 
574 
575 
576  /// iterators
577 
578  // returns forward iterator to iterate over all key/data pairs
580  if (d_size > 0) {
581  // find first non-empty bucket
582  size_type index = 0;
583  while (d_data[index] == NULL) {
584  ++index;
585  }
586 
587  return iterator(this, d_data[index]);
588  }
589  else {
590  return end();
591  }
592  }
593 
594  // const version of begin
596  if (d_size > 0) {
597  // find first non-empty bucket
598  size_type index = 0;
599  while (d_data[index] == NULL) {
600  ++index;
601  }
602 
603  return const_iterator(this, d_data[index]);
604  }
605  else {
606  return end();
607  }
608  }
609 
610 
611  // returns end iterator
613  return iterator(this, NULL);
614  }
615 
616  // const version of end
617  const_iterator end() const {
618  return const_iterator(this, NULL);
619  }
620 
621 
622 
623 
624  /// inner classes
625 
626 
627 
628 
629  // iterator over hashtable elements
630 
631  // modifying the hash table leaves iterator intact,
632  // unless the key in the value it points to is modified/deleted
633  class iterator {
634  friend class hash_table;
635  friend class const_iterator;
636 
637  /// variables
638 
639  protected:
640  // the hash table of this iterator
642  // iterator points to current element in some bucket
644 
645 
646  /// methods
647  protected:
648  // used by hash_table to create an iterator
650  : d_hash_table(hash_table), d_node(node)
651  { }
652 
653  public:
654  // public default constructor,
655  // leaves the iterator in undefined state.
657  : d_hash_table(NULL), d_node(NULL)
658  { }
659 
660  // copy constructor
661  iterator(const iterator& other)
662  : d_hash_table(other.d_hash_table), d_node(other.d_node)
663  { }
664 
665  // assignment
666  iterator& operator=(const iterator& other) {
667  if (this != &other) {
668  d_hash_table = other.d_hash_table;
669  d_node = other.d_node;
670  }
671 
672  return *this;
673  }
674 
675  // go to next data - pre-increment
677  // must not be the end iterator
678  DebugAssert(d_node != NULL, "hash operator++");
679 
680  // get current bucket index
682 
683  // go to next entry in bucket
684  d_node = d_node->d_next;
685 
686  // while current bucket empty
687  while (d_node == NULL) {
688  // go to next bucket
689  ++index;
690 
691  // all buckets exhausted
692  if (index == d_hash_table->d_data.size()) {
693  // unfortunately this does not work, as end() returns a tmp value
694  // return d_hash_table->end();
695  *this = d_hash_table->end();
696  return *this;
697  }
698  DebugAssert(index < d_hash_table->d_data.size(),
699  "hash operator++ 2");
700 
702  }
703 
704  return *this;
705  };
706 
707  // go to next data - post-increment
709  iterator tmp = *this;
710  ++(*this);
711  return tmp;
712  }
713 
715  return d_node->d_value;
716  }
717 
719  return &(operator*());
720  }
721 
722  // are two iterator identical?
723  bool operator==(const iterator& other) const {
724  // if the same bucket iterator, then it must be the same hash table
725  DebugAssert(d_node == NULL || d_node != other.d_node || d_hash_table == other.d_hash_table, "hash operator==");
726  return (d_node == other.d_node);
727  }
728 
729  // negation of ==
730  bool operator!=(const iterator& other) const {
731  return !(*this == other);
732  }
733  };
734 
735 
736 
737 
738  // const iterator over hashtable elements
739 
740  // modifying the hash table leaves iterator intact,
741  // unless the key in the value it points to is modified/deleted
743  friend class hash_table;
744 
745  /// variables
746 
747  protected:
748  // the hash table of this iterator
750  // iterator points to current element in some bucket
752 
753 
754  /// methods
755  protected:
756  // used by hash_table to create an iterator
758  : d_hash_table(hash_table), d_node(node)
759  { }
760 
761  public:
762  // public default constructor,
763  // leaves the iterator in undefined state.
765  : d_hash_table(NULL), d_node(NULL)
766  { }
767 
768  // copy constructor
770  : d_hash_table(other.d_hash_table), d_node(other.d_node)
771  { }
772 
773  // conversion constructor from non-const iterator
774  const_iterator(const iterator& other)
775  : d_hash_table(other.d_hash_table), d_node(other.d_node)
776  { }
777 
778  // assignment
780  if (this != &other) {
781  d_hash_table = other.d_hash_table;
782  d_node = other.d_node;
783  }
784 
785  return *this;
786  }
787 
788  // go to next data - pre-increment
790  // must not be the end iterator
791  DebugAssert(d_node != NULL, "");
792 
793  // get current bucket index
795 
796  // go to next entry in bucket
797  d_node = d_node->d_next;
798 
799  // while current bucket empty
800  while (d_node == NULL) {
801  // go to next bucket
802  ++index;
803 
804  // all buckets exhausted
805  if (index == d_hash_table->d_data.size()) {
806  // unfortunately this does not work, as end() returns a tmp value
807  // return d_hash_table->end();
808  *this = d_hash_table->end();
809  return *this;
810  }
811  DebugAssert(index < d_hash_table->d_data.size(),"");
812 
814  }
815 
816  return *this;
817  };
818 
819  // go to next data - post-increment
821  const_iterator tmp = *this;
822  ++(*this);
823  return tmp;
824  }
825 
826  const value_type& operator*() const {
827  return d_node->d_value;
828  }
829 
830  const value_type* operator->() const {
831  return &(operator*());
832  }
833 
834  // are two iterator identical?
835  bool operator==(const const_iterator& other) const {
836  // if the same bucket iterator, then it must be the same hash table
837  DebugAssert(d_node == NULL || d_node != other.d_node || d_hash_table == other.d_hash_table,"");
838  return (d_node == other.d_node);
839  }
840 
841  // negation of ==
842  bool operator!=(const const_iterator& other) const {
843  return !(*this == other);
844  }
845  };
846  };
847 
848 }
849 
850 #endif