ScummVM API documentation
hashmap.h
1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 // The hash map (associative array) implementation in this file is
23 // based on the PyDict implementation of CPython.
24 
25 #ifndef COMMON_HASHMAP_H
26 #define COMMON_HASHMAP_H
27 
28 // Enable the following #define if you want to check how many collisions the
29 // code produces (many collisions indicate either a bad hash function, or a
30 // hash table that is too small).
31 
32 //#define DEBUG_HASH_COLLISIONS
33 
39 #define USE_HASHMAP_MEMORY_POOL
40 
41 
42 #include "common/func.h"
43 
44 #include "common/str.h"
45 
46 #ifdef DEBUG_HASH_COLLISIONS
47 #include "common/debug.h"
48 #endif
49 
50 #ifdef USE_HASHMAP_MEMORY_POOL
51 #include "common/memorypool.h"
52 #endif
53 
54 namespace Common {
55 
65 // The Intel C++ Compiler has difficulties with nested templates.
66 #if defined(__INTEL_COMPILER)
67 template<class T> class IteratorImpl;
68 #endif
69 
70 
84 template<class Key, class Val, class HashFunc = Hash<Key>, class EqualFunc = EqualTo<Key> >
85 class HashMap {
86 public:
87  typedef uint size_type;
88 
89  struct Node {
90  Val _value;
91  const Key _key;
92  explicit Node(const Key &key) : _key(key), _value() {}
93  Node() : _key(), _value() {}
94  };
95 
96 private:
97 
99 
100  enum {
101  HASHMAP_PERTURB_SHIFT = 5,
102  HASHMAP_MIN_CAPACITY = 16,
103 
104  // The quotient of the next two constants controls how much the
105  // internal storage of the hashmap may fill up before being
106  // increased automatically.
107  // Note: the quotient of these two must be between and different
108  // from 0 and 1.
109  HASHMAP_LOADFACTOR_NUMERATOR = 2,
110  HASHMAP_LOADFACTOR_DENOMINATOR = 3,
111 
112  HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR
113  };
114 
115 #ifdef USE_HASHMAP_MEMORY_POOL
117 #endif
118 
120  Val _defaultVal;
121 
122  Node **_storage;
123  size_type _mask;
124  size_type _size;
125  size_type _deleted;
126 
127  HashFunc _hash;
128  EqualFunc _equal;
129 
131  #define HASHMAP_DUMMY_NODE ((Node *)1)
132 
133 #ifdef DEBUG_HASH_COLLISIONS
134  mutable int _collisions, _lookups, _dummyHits;
135 #endif
136 
137  Node *allocNode(const Key &key) {
138 #ifdef USE_HASHMAP_MEMORY_POOL
139  return new (_nodePool) Node(key);
140 #else
141  return new Node(key);
142 #endif
143  }
144 
145  void freeNode(Node *node) {
146  if (node && node != HASHMAP_DUMMY_NODE)
147 #ifdef USE_HASHMAP_MEMORY_POOL
148  _nodePool.deleteChunk(node);
149 #else
150  delete node;
151 #endif
152  }
153 
154  void assign(const HM_t &map);
155  size_type lookup(const Key &key) const;
156  size_type lookupAndCreateIfMissing(const Key &key);
157  void expandStorage(size_type newCapacity);
158 
159  template<class T> friend class IteratorImpl;
160 
164  template<class NodeType>
165  class IteratorImpl {
166  friend class HashMap;
167 #if defined(__INTEL_COMPILER)
168  template<class T> friend class Common::IteratorImpl;
169 #else
170  template<class T> friend class IteratorImpl;
171 #endif
172  protected:
173  typedef const HashMap hashmap_t;
174 
175  size_type _idx;
176  hashmap_t *_hashmap;
177 
178  protected:
179  IteratorImpl(size_type idx, hashmap_t *hashmap) : _idx(idx), _hashmap(hashmap) {}
180 
181  NodeType *deref() const {
182  assert(_hashmap != nullptr);
183  assert(_idx <= _hashmap->_mask);
184  Node *node = _hashmap->_storage[_idx];
185  assert(node != nullptr);
186  assert(node != HASHMAP_DUMMY_NODE);
187  return node;
188  }
189 
190  public:
191  IteratorImpl() : _idx(0), _hashmap(nullptr) {}
192  template<class T>
193  IteratorImpl(const IteratorImpl<T> &c) : _idx(c._idx), _hashmap(c._hashmap) {}
194 
195  NodeType &operator*() const { return *deref(); }
196  NodeType *operator->() const { return deref(); }
197 
198  bool operator==(const IteratorImpl &iter) const { return _idx == iter._idx && _hashmap == iter._hashmap; }
199  bool operator!=(const IteratorImpl &iter) const { return !(*this == iter); }
200 
201  IteratorImpl &operator++() {
202  assert(_hashmap);
203  do {
204  _idx++;
205  } while (_idx <= _hashmap->_mask && (_hashmap->_storage[_idx] == nullptr || _hashmap->_storage[_idx] == HASHMAP_DUMMY_NODE));
206  if (_idx > _hashmap->_mask)
207  _idx = (size_type)-1;
208 
209  return *this;
210  }
211 
212  IteratorImpl operator++(int) {
213  IteratorImpl old = *this;
214  operator ++();
215  return old;
216  }
217  };
218 
219 public:
220  typedef IteratorImpl<Node> iterator;
221  typedef IteratorImpl<const Node> const_iterator;
222 
223  HashMap();
224  HashMap(const HM_t &map);
225  ~HashMap();
226 
227  HM_t &operator=(const HM_t &map) {
228  if (this == &map)
229  return *this;
230 
231  // Remove the previous content and ...
232  clear();
233  delete[] _storage;
234  // ... copy the new stuff.
235  assign(map);
236  return *this;
237  }
238 
239  bool contains(const Key &key) const;
240 
241  Val &operator[](const Key &key);
242  const Val &operator[](const Key &key) const;
243 
244  Val &getOrCreateVal(const Key &key);
245  Val &getVal(const Key &key);
246  const Val &getVal(const Key &key) const;
247  const Val &getValOrDefault(const Key &key) const;
248  const Val &getValOrDefault(const Key &key, const Val &defaultVal) const;
249  bool tryGetVal(const Key &key, Val &out) const;
250  void setVal(const Key &key, const Val &val);
251 
252  void clear(bool shrinkArray = 0);
253 
254  void erase(iterator entry);
255  void erase(const Key &key);
256 
257  size_type size() const { return _size; }
258 
259  iterator begin() {
260  // Find and return the first non-empty entry
261  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
262  if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
263  return iterator(ctr, this);
264  }
265  return end();
266  }
267  iterator end() {
268  return iterator((size_type)-1, this);
269  }
270 
271  const_iterator begin() const {
272  // Find and return the first non-empty entry
273  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
274  if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
275  return const_iterator(ctr, this);
276  }
277  return end();
278  }
279  const_iterator end() const {
280  return const_iterator((size_type)-1, this);
281  }
282 
283  iterator find(const Key &key) {
284  size_type ctr = lookup(key);
285  if (_storage[ctr])
286  return iterator(ctr, this);
287  return end();
288  }
289 
290  const_iterator find(const Key &key) const {
291  size_type ctr = lookup(key);
292  if (_storage[ctr])
293  return const_iterator(ctr, this);
294  return end();
295  }
296 
297  // TODO: insert() method?
299  bool empty() const {
300  return (_size == 0);
301  }
302 };
303 
304 template <class Key>
305 void NORETURN_PRE unknownKeyError(Key k) NORETURN_POST {
306  error("Unknown key");
307 }
308 
309 template<>
310 void NORETURN_PRE unknownKeyError(::Common::String key) NORETURN_POST;
311 template<>
312 void NORETURN_PRE unknownKeyError(signed char key) NORETURN_POST;
313 template<>
314 void NORETURN_PRE unknownKeyError(unsigned char key) NORETURN_POST;
315 template<>
316 void NORETURN_PRE unknownKeyError(short signed key) NORETURN_POST;
317 template<>
318 void NORETURN_PRE unknownKeyError(short unsigned key) NORETURN_POST;
319 template<>
320 void NORETURN_PRE unknownKeyError(long signed key) NORETURN_POST;
321 template<>
322 void NORETURN_PRE unknownKeyError(long unsigned key) NORETURN_POST;
323 template<>
324 void NORETURN_PRE unknownKeyError(signed int key) NORETURN_POST;
325 template<>
326 void NORETURN_PRE unknownKeyError(unsigned int key) NORETURN_POST;
327 template<>
328 void NORETURN_PRE unknownKeyError(long long signed key) NORETURN_POST;
329 template<>
330 void NORETURN_PRE unknownKeyError(long long unsigned key) NORETURN_POST;
331 template<>
332 void NORETURN_PRE unknownKeyError(void *key) NORETURN_POST;
333 template<>
334 void NORETURN_PRE unknownKeyError(const char *key) NORETURN_POST;
335 
336 //-------------------------------------------------------
337 // HashMap functions
338 
342 template<class Key, class Val, class HashFunc, class EqualFunc>
344  _mask = HASHMAP_MIN_CAPACITY - 1;
345  _storage = new Node *[HASHMAP_MIN_CAPACITY];
346  assert(_storage != nullptr);
347  memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
348 
349  _size = 0;
350  _deleted = 0;
351 
352 #ifdef DEBUG_HASH_COLLISIONS
353  _collisions = 0;
354  _lookups = 0;
355  _dummyHits = 0;
356 #endif
357 }
358 
364 template<class Key, class Val, class HashFunc, class EqualFunc>
366  _defaultVal() {
367 #ifdef DEBUG_HASH_COLLISIONS
368  _collisions = 0;
369  _lookups = 0;
370  _dummyHits = 0;
371 #endif
372  assign(map);
373 }
374 
378 template<class Key, class Val, class HashFunc, class EqualFunc>
380  for (size_type ctr = 0; ctr <= _mask; ++ctr)
381  freeNode(_storage[ctr]);
382 
383  delete[] _storage;
384 #ifdef DEBUG_HASH_COLLISIONS
385  extern void updateHashCollisionStats(int, int, int, int, int);
386  updateHashCollisionStats(_collisions, _dummyHits, _lookups, _mask + 1, _size);
387 #endif
388 }
389 
397 template<class Key, class Val, class HashFunc, class EqualFunc>
399  _mask = map._mask;
400  _storage = new Node *[_mask + 1];
401  assert(_storage != nullptr);
402  memset(_storage, 0, (_mask + 1) * sizeof(Node *));
403 
404  // Simply clone the map given to us, one by one.
405  _size = 0;
406  _deleted = 0;
407  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
408  if (map._storage[ctr] == HASHMAP_DUMMY_NODE) {
409  _storage[ctr] = HASHMAP_DUMMY_NODE;
410  _deleted++;
411  } else if (map._storage[ctr] != nullptr) {
412  _storage[ctr] = allocNode(map._storage[ctr]->_key);
413  _storage[ctr]->_value = map._storage[ctr]->_value;
414  _size++;
415  }
416  }
417  // Perform a sanity check (to help track down hashmap corruption)
418  assert(_size == map._size);
419  assert(_deleted == map._deleted);
420 }
421 
426 template<class Key, class Val, class HashFunc, class EqualFunc>
428  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
429  freeNode(_storage[ctr]);
430  _storage[ctr] = nullptr;
431  }
432 
433 #ifdef USE_HASHMAP_MEMORY_POOL
434  _nodePool.freeUnusedPages();
435 #endif
436 
437  if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) {
438  delete[] _storage;
439 
440  _mask = HASHMAP_MIN_CAPACITY - 1;
441  _storage = new Node *[HASHMAP_MIN_CAPACITY];
442  assert(_storage != nullptr);
443  memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
444  }
445 
446  _size = 0;
447  _deleted = 0;
448 }
449 
450 template<class Key, class Val, class HashFunc, class EqualFunc>
451 void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(size_type newCapacity) {
452  assert(newCapacity > _mask + 1);
453 
454 #ifndef NDEBUG
455  const size_type old_size = _size;
456 #endif
457  const size_type old_mask = _mask;
458  Node **old_storage = _storage;
459 
460  // allocate a new array
461  _size = 0;
462  _deleted = 0;
463  _mask = newCapacity - 1;
464  _storage = new Node *[newCapacity];
465  assert(_storage != nullptr);
466  memset(_storage, 0, newCapacity * sizeof(Node *));
467 
468  // rehash all the old elements
469  for (size_type ctr = 0; ctr <= old_mask; ++ctr) {
470  if (old_storage[ctr] == nullptr || old_storage[ctr] == HASHMAP_DUMMY_NODE)
471  continue;
472 
473  // Insert the element from the old table into the new table.
474  // Since we know that no key exists twice in the old table, we
475  // can do this slightly better than by calling lookup, since we
476  // don't have to call _equal().
477  const size_type hash = _hash(old_storage[ctr]->_key);
478  size_type idx = hash & _mask;
479  for (size_type perturb = hash; _storage[idx] != nullptr && _storage[idx] != HASHMAP_DUMMY_NODE; perturb >>= HASHMAP_PERTURB_SHIFT) {
480  idx = (5 * idx + perturb + 1) & _mask;
481  }
482 
483  _storage[idx] = old_storage[ctr];
484  _size++;
485  }
486 
487  // Perform a sanity check: Old number of elements should match the new one!
488  // This check will fail if some previous operation corrupted this hashmap.
489  assert(_size == old_size);
490 
491  delete[] old_storage;
492 
493  return;
494 }
495 
496 template<class Key, class Val, class HashFunc, class EqualFunc>
497 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
498  const size_type hash = _hash(key);
499  size_type ctr = hash & _mask;
500  for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
501  if (_storage[ctr] == nullptr)
502  break;
503  if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
504 #ifdef DEBUG_HASH_COLLISIONS
505  _dummyHits++;
506 #endif
507  } else if (_equal(_storage[ctr]->_key, key))
508  break;
509 
510  ctr = (5 * ctr + perturb + 1) & _mask;
511 
512 #ifdef DEBUG_HASH_COLLISIONS
513  _collisions++;
514 #endif
515  }
516 
517 #ifdef DEBUG_HASH_COLLISIONS
518  _lookups++;
519  debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
520  _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
521  (const void *)this, _mask + 1, _size);
522 #endif
523 
524  return ctr;
525 }
526 
527 template<class Key, class Val, class HashFunc, class EqualFunc>
528 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key &key) {
529  const size_type hash = _hash(key);
530  size_type ctr = hash & _mask;
531  const size_type NONE_FOUND = _mask + 1;
532  size_type first_free = NONE_FOUND;
533  bool found = false;
534  for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
535  if (_storage[ctr] == nullptr)
536  break;
537  if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
538 #ifdef DEBUG_HASH_COLLISIONS
539  _dummyHits++;
540 #endif
541  if (first_free == NONE_FOUND)
542  first_free = ctr;
543  } else if (_equal(_storage[ctr]->_key, key)) {
544  found = true;
545  break;
546  }
547 
548  ctr = (5 * ctr + perturb + 1) & _mask;
549 
550 #ifdef DEBUG_HASH_COLLISIONS
551  _collisions++;
552 #endif
553  }
554 
555 #ifdef DEBUG_HASH_COLLISIONS
556  _lookups++;
557  debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
558  _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
559  (const void *)this, _mask + 1, _size);
560 #endif
561 
562  if (!found && first_free != NONE_FOUND)
563  ctr = first_free;
564 
565  if (!found) {
566  if (_storage[ctr])
567  _deleted--;
568  _storage[ctr] = allocNode(key);
569  assert(_storage[ctr] != nullptr);
570  _size++;
571 
572  // Keep the load factor below a certain threshold.
573  // Deleted nodes are also counted
574  size_type capacity = _mask + 1;
575  if ((_size + _deleted) * HASHMAP_LOADFACTOR_DENOMINATOR >
576  capacity * HASHMAP_LOADFACTOR_NUMERATOR) {
577  capacity = capacity < 500 ? (capacity * 4) : (capacity * 2);
578  expandStorage(capacity);
579  ctr = lookup(key);
580  assert(_storage[ctr] != nullptr);
581  }
582  }
583 
584  return ctr;
585 }
586 
591 template<class Key, class Val, class HashFunc, class EqualFunc>
593  size_type ctr = lookup(key);
594  return (_storage[ctr] != nullptr);
595 }
596 
601 template<class Key, class Val, class HashFunc, class EqualFunc>
603  return getOrCreateVal(key);
604 }
605 
610 template<class Key, class Val, class HashFunc, class EqualFunc>
611 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) const {
612  return getVal(key);
613 }
614 
619 template<class Key, class Val, class HashFunc, class EqualFunc>
621  size_type ctr = lookupAndCreateIfMissing(key);
622  assert(_storage[ctr] != nullptr);
623  return _storage[ctr]->_value;
624 }
625 
630 template<class Key, class Val, class HashFunc, class EqualFunc>
632  size_type ctr = lookup(key);
633  if (_storage[ctr] != nullptr)
634  return _storage[ctr]->_value;
635  else
636  // In the past getVal() and operator[] used to return the default value for this case.
637  // Clarifying the intent by using getValOrDefault() when we query a key that may not be
638  // present is a good idea, but we have a lot of legacy code that may need to be updated.
639  // So for now only returns an error in non-release builds. Once we are confident all the
640  // code has been updated to use the correct function we can remove the RELEASE_BUILD
641  // special case.
642 #ifdef RELEASE_BUILD
643  return _defaultVal;
644 #else
645  unknownKeyError(key);
646 #endif
647 }
648 
649 template<class Key, class Val, class HashFunc, class EqualFunc>
650 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) const {
651  size_type ctr = lookup(key);
652  if (_storage[ctr] != nullptr)
653  return _storage[ctr]->_value;
654  else
655  // See comment in non-const getVal() above.
656 #ifdef RELEASE_BUILD
657  return _defaultVal;
658 #else
659  unknownKeyError(key);
660 #endif
661 }
662 
663 template<class Key, class Val, class HashFunc, class EqualFunc>
664 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getValOrDefault(const Key &key) const {
665  return getValOrDefault(key, _defaultVal);
666 }
667 
672 template<class Key, class Val, class HashFunc, class EqualFunc>
673 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getValOrDefault(const Key &key, const Val &defaultVal) const {
674  size_type ctr = lookup(key);
675  if (_storage[ctr] != nullptr)
676  return _storage[ctr]->_value;
677  else
678  return defaultVal;
679 }
680 
685 template<class Key, class Val, class HashFunc, class EqualFunc>
686 bool HashMap<Key, Val, HashFunc, EqualFunc>::tryGetVal(const Key &key, Val &out) const {
687  size_type ctr = lookup(key);
688  if (_storage[ctr] != nullptr) {
689  out = _storage[ctr]->_value;
690  return true;
691  } else {
692  return false;
693  }
694 }
695 
696 template<class Key, class Val, class HashFunc, class EqualFunc>
697 void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &val) {
698  size_type ctr = lookupAndCreateIfMissing(key);
699  assert(_storage[ctr] != nullptr);
700  _storage[ctr]->_value = val;
701 }
702 
707 template<class Key, class Val, class HashFunc, class EqualFunc>
709  // Check whether we have a valid iterator
710  assert(entry._hashmap == this);
711  const size_type ctr = entry._idx;
712  assert(ctr <= _mask);
713  Node * const node = _storage[ctr];
714  assert(node != NULL);
715  assert(node != HASHMAP_DUMMY_NODE);
716 
717  // If we remove a key, we replace it with a dummy node.
718  freeNode(node);
719  _storage[ctr] = HASHMAP_DUMMY_NODE;
720  _size--;
721  _deleted++;
722 }
723 
728 template<class Key, class Val, class HashFunc, class EqualFunc>
730 
731  size_type ctr = lookup(key);
732  if (_storage[ctr] == nullptr)
733  return;
734 
735  // If we remove a key, we replace it with a dummy node.
736  freeNode(_storage[ctr]);
737  _storage[ctr] = HASHMAP_DUMMY_NODE;
738  _size--;
739  _deleted++;
740  return;
741 }
742 
743 #undef HASHMAP_DUMMY_NODE
744 
747 } // End of namespace Common
748 
749 #endif
void clear(bool shrinkArray=0)
Definition: hashmap.h:427
Definition: str.h:59
Val & getVal(const Key &key)
Definition: hashmap.h:631
Val & operator[](const Key &key)
Definition: hashmap.h:602
In find(In first, In last, const T &v)
Definition: algorithm.h:168
void debug(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
Definition: hashmap.h:85
void deleteChunk(T *ptr)
Definition: memorypool.h:145
Definition: algorithm.h:29
Definition: memorypool.h:139
bool empty() const
Definition: hashmap.h:299
bool tryGetVal(const Key &key, Val &out) const
Definition: hashmap.h:686
bool contains(const Key &key) const
Definition: hashmap.h:592
void NORETURN_PRE error(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
void erase(iterator entry)
Definition: hashmap.h:708
HashMap()
Definition: hashmap.h:343
~HashMap()
Definition: hashmap.h:379
Val & getOrCreateVal(const Key &key)
Definition: hashmap.h:620
Definition: lobject.h:332
Definition: hashmap.h:89