ScummVM API documentation
hashmap.h
1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 // The hash map (associative array) implementation in this file is
23 // based on the PyDict implementation of CPython.
24 
25 #ifndef COMMON_HASHMAP_H
26 #define COMMON_HASHMAP_H
27 
28 // Enable the following #define if you want to check how many collisions the
29 // code produces (many collisions indicate either a bad hash function, or a
30 // hash table that is too small).
31 
32 //#define DEBUG_HASH_COLLISIONS
33 
39 #define USE_HASHMAP_MEMORY_POOL
40 
41 
42 #include "common/func.h"
43 
44 #include "common/str.h"
45 
46 #ifdef DEBUG_HASH_COLLISIONS
47 #include "common/debug.h"
48 #endif
49 
50 #ifdef USE_HASHMAP_MEMORY_POOL
51 #include "common/memorypool.h"
52 #endif
53 
54 namespace Common {
55 
65 // The Intel C++ Compiler has difficulties with nested templates.
66 #if defined(__INTEL_COMPILER)
67 template<class T> class IteratorImpl;
68 #endif
69 
70 
84 template<class Key, class Val, class HashFunc = Hash<Key>, class EqualFunc = EqualTo<Key> >
85 class HashMap {
86 public:
87  typedef uint size_type;
88 
89  struct Node {
90  Val _value;
91  const Key _key;
92  explicit Node(const Key &key) : _key(key), _value() {}
93  Node() : _key(), _value() {}
94  };
95 
96 private:
97 
99 
100  enum {
101  HASHMAP_PERTURB_SHIFT = 5,
102  HASHMAP_MIN_CAPACITY = 16,
103 
104  // The quotient of the next two constants controls how much the
105  // internal storage of the hashmap may fill up before being
106  // increased automatically.
107  // Note: the quotient of these two must be between and different
108  // from 0 and 1.
109  HASHMAP_LOADFACTOR_NUMERATOR = 2,
110  HASHMAP_LOADFACTOR_DENOMINATOR = 3,
111 
112 #ifdef REDUCE_MEMORY_USAGE
113  HASHMAP_MEMORYPOOL_SIZE = 0
114 #else
115  HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR
116 #endif
117  };
118 
119 #ifdef USE_HASHMAP_MEMORY_POOL
121 #endif
122 
124  Val _defaultVal;
125 
126  Node **_storage;
127  size_type _mask;
128  size_type _size;
129  size_type _deleted;
130 
131  HashFunc _hash;
132  EqualFunc _equal;
133 
135  #define HASHMAP_DUMMY_NODE ((Node *)1)
136 
137 #ifdef DEBUG_HASH_COLLISIONS
138  mutable int _collisions, _lookups, _dummyHits;
139 #endif
140 
141  Node *allocNode(const Key &key) {
142 #ifdef USE_HASHMAP_MEMORY_POOL
143  return new (_nodePool) Node(key);
144 #else
145  return new Node(key);
146 #endif
147  }
148 
149  void freeNode(Node *node) {
150  if (node && node != HASHMAP_DUMMY_NODE)
151 #ifdef USE_HASHMAP_MEMORY_POOL
152  _nodePool.deleteChunk(node);
153 #else
154  delete node;
155 #endif
156  }
157 
158  void assign(const HM_t &map);
159  size_type lookup(const Key &key) const;
160  size_type lookupAndCreateIfMissing(const Key &key);
161  void expandStorage(size_type newCapacity);
162 
163  template<class T> friend class IteratorImpl;
164 
168  template<class NodeType>
169  class IteratorImpl {
170  friend class HashMap;
171 #if defined(__INTEL_COMPILER)
172  template<class T> friend class Common::IteratorImpl;
173 #else
174  template<class T> friend class IteratorImpl;
175 #endif
176  protected:
177  typedef const HashMap hashmap_t;
178 
179  size_type _idx;
180  hashmap_t *_hashmap;
181 
182  protected:
183  IteratorImpl(size_type idx, hashmap_t *hashmap) : _idx(idx), _hashmap(hashmap) {}
184 
185  NodeType *deref() const {
186  assert(_hashmap != nullptr);
187  assert(_idx <= _hashmap->_mask);
188  Node *node = _hashmap->_storage[_idx];
189  assert(node != nullptr);
190  assert(node != HASHMAP_DUMMY_NODE);
191  return node;
192  }
193 
194  public:
195  IteratorImpl() : _idx(0), _hashmap(nullptr) {}
196  template<class T>
197  IteratorImpl(const IteratorImpl<T> &c) : _idx(c._idx), _hashmap(c._hashmap) {}
198 
199  NodeType &operator*() const { return *deref(); }
200  NodeType *operator->() const { return deref(); }
201 
202  bool operator==(const IteratorImpl &iter) const { return _idx == iter._idx && _hashmap == iter._hashmap; }
203  bool operator!=(const IteratorImpl &iter) const { return !(*this == iter); }
204 
205  IteratorImpl &operator++() {
206  assert(_hashmap);
207  do {
208  _idx++;
209  } while (_idx <= _hashmap->_mask && (_hashmap->_storage[_idx] == nullptr || _hashmap->_storage[_idx] == HASHMAP_DUMMY_NODE));
210  if (_idx > _hashmap->_mask)
211  _idx = (size_type)-1;
212 
213  return *this;
214  }
215 
216  IteratorImpl operator++(int) {
217  IteratorImpl old = *this;
218  operator ++();
219  return old;
220  }
221  };
222 
223 public:
224  typedef IteratorImpl<Node> iterator;
225  typedef IteratorImpl<const Node> const_iterator;
226 
227  HashMap();
228  HashMap(const HM_t &map);
229  ~HashMap();
230 
231  HM_t &operator=(const HM_t &map) {
232  if (this == &map)
233  return *this;
234 
235  // Remove the previous content and ...
236  clear();
237  delete[] _storage;
238  // ... copy the new stuff.
239  assign(map);
240  return *this;
241  }
242 
243  bool contains(const Key &key) const;
244 
245  Val &operator[](const Key &key);
246  const Val &operator[](const Key &key) const;
247 
248  Val &getOrCreateVal(const Key &key);
249  Val &getVal(const Key &key);
250  const Val &getVal(const Key &key) const;
251  const Val &getValOrDefault(const Key &key) const;
252  const Val &getValOrDefault(const Key &key, const Val &defaultVal) const;
253  bool tryGetVal(const Key &key, Val &out) const;
254  void setVal(const Key &key, const Val &val);
255 
256  void clear(bool shrinkArray = 0);
257 
258  void erase(iterator entry);
259  void erase(const Key &key);
260 
261  size_type size() const { return _size; }
262 
263  iterator begin() {
264  // Find and return the first non-empty entry
265  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
266  if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
267  return iterator(ctr, this);
268  }
269  return end();
270  }
271  iterator end() {
272  return iterator((size_type)-1, this);
273  }
274 
275  const_iterator begin() const {
276  // Find and return the first non-empty entry
277  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
278  if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
279  return const_iterator(ctr, this);
280  }
281  return end();
282  }
283  const_iterator end() const {
284  return const_iterator((size_type)-1, this);
285  }
286 
287  iterator find(const Key &key) {
288  size_type ctr = lookup(key);
289  if (_storage[ctr])
290  return iterator(ctr, this);
291  return end();
292  }
293 
294  const_iterator find(const Key &key) const {
295  size_type ctr = lookup(key);
296  if (_storage[ctr])
297  return const_iterator(ctr, this);
298  return end();
299  }
300 
301  // TODO: insert() method?
303  bool empty() const {
304  return (_size == 0);
305  }
306 };
307 
308 template <class Key>
309 void NORETURN_PRE unknownKeyError(Key k) NORETURN_POST {
310  error("Unknown key");
311 }
312 
313 template<>
314 void NORETURN_PRE unknownKeyError(::Common::String key) NORETURN_POST;
315 template<>
316 void NORETURN_PRE unknownKeyError(signed char key) NORETURN_POST;
317 template<>
318 void NORETURN_PRE unknownKeyError(unsigned char key) NORETURN_POST;
319 template<>
320 void NORETURN_PRE unknownKeyError(short signed key) NORETURN_POST;
321 template<>
322 void NORETURN_PRE unknownKeyError(short unsigned key) NORETURN_POST;
323 template<>
324 void NORETURN_PRE unknownKeyError(long signed key) NORETURN_POST;
325 template<>
326 void NORETURN_PRE unknownKeyError(long unsigned key) NORETURN_POST;
327 template<>
328 void NORETURN_PRE unknownKeyError(signed int key) NORETURN_POST;
329 template<>
330 void NORETURN_PRE unknownKeyError(unsigned int key) NORETURN_POST;
331 template<>
332 void NORETURN_PRE unknownKeyError(long long signed key) NORETURN_POST;
333 template<>
334 void NORETURN_PRE unknownKeyError(long long unsigned key) NORETURN_POST;
335 template<>
336 void NORETURN_PRE unknownKeyError(void *key) NORETURN_POST;
337 template<>
338 void NORETURN_PRE unknownKeyError(const char *key) NORETURN_POST;
339 
340 //-------------------------------------------------------
341 // HashMap functions
342 
346 template<class Key, class Val, class HashFunc, class EqualFunc>
348  _mask = HASHMAP_MIN_CAPACITY - 1;
349  _storage = new Node *[HASHMAP_MIN_CAPACITY];
350  assert(_storage != nullptr);
351  memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
352 
353  _size = 0;
354  _deleted = 0;
355 
356 #ifdef DEBUG_HASH_COLLISIONS
357  _collisions = 0;
358  _lookups = 0;
359  _dummyHits = 0;
360 #endif
361 }
362 
368 template<class Key, class Val, class HashFunc, class EqualFunc>
370  _defaultVal() {
371 #ifdef DEBUG_HASH_COLLISIONS
372  _collisions = 0;
373  _lookups = 0;
374  _dummyHits = 0;
375 #endif
376  assign(map);
377 }
378 
382 template<class Key, class Val, class HashFunc, class EqualFunc>
384  for (size_type ctr = 0; ctr <= _mask; ++ctr)
385  freeNode(_storage[ctr]);
386 
387  delete[] _storage;
388 #ifdef DEBUG_HASH_COLLISIONS
389  extern void updateHashCollisionStats(int, int, int, int, int);
390  updateHashCollisionStats(_collisions, _dummyHits, _lookups, _mask + 1, _size);
391 #endif
392 }
393 
401 template<class Key, class Val, class HashFunc, class EqualFunc>
403  _mask = map._mask;
404  _storage = new Node *[_mask + 1];
405  assert(_storage != nullptr);
406  memset(_storage, 0, (_mask + 1) * sizeof(Node *));
407 
408  // Simply clone the map given to us, one by one.
409  _size = 0;
410  _deleted = 0;
411  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
412  if (map._storage[ctr] == HASHMAP_DUMMY_NODE) {
413  _storage[ctr] = HASHMAP_DUMMY_NODE;
414  _deleted++;
415  } else if (map._storage[ctr] != nullptr) {
416  _storage[ctr] = allocNode(map._storage[ctr]->_key);
417  _storage[ctr]->_value = map._storage[ctr]->_value;
418  _size++;
419  }
420  }
421  // Perform a sanity check (to help track down hashmap corruption)
422  assert(_size == map._size);
423  assert(_deleted == map._deleted);
424 }
425 
430 template<class Key, class Val, class HashFunc, class EqualFunc>
432  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
433  freeNode(_storage[ctr]);
434  _storage[ctr] = nullptr;
435  }
436 
437 #ifdef USE_HASHMAP_MEMORY_POOL
438  _nodePool.freeUnusedPages();
439 #endif
440 
441  if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) {
442  delete[] _storage;
443 
444  _mask = HASHMAP_MIN_CAPACITY - 1;
445  _storage = new Node *[HASHMAP_MIN_CAPACITY];
446  assert(_storage != nullptr);
447  memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
448  }
449 
450  _size = 0;
451  _deleted = 0;
452 }
453 
454 template<class Key, class Val, class HashFunc, class EqualFunc>
455 void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(size_type newCapacity) {
456  assert(newCapacity > _mask + 1);
457 
458 #ifndef RELEASE_BUILD
459  const size_type old_size = _size;
460 #endif
461  const size_type old_mask = _mask;
462  Node **old_storage = _storage;
463 
464  // allocate a new array
465  _size = 0;
466  _deleted = 0;
467  _mask = newCapacity - 1;
468  _storage = new Node *[newCapacity];
469  assert(_storage != nullptr);
470  memset(_storage, 0, newCapacity * sizeof(Node *));
471 
472  // rehash all the old elements
473  for (size_type ctr = 0; ctr <= old_mask; ++ctr) {
474  if (old_storage[ctr] == nullptr || old_storage[ctr] == HASHMAP_DUMMY_NODE)
475  continue;
476 
477  // Insert the element from the old table into the new table.
478  // Since we know that no key exists twice in the old table, we
479  // can do this slightly better than by calling lookup, since we
480  // don't have to call _equal().
481  const size_type hash = _hash(old_storage[ctr]->_key);
482  size_type idx = hash & _mask;
483  for (size_type perturb = hash; _storage[idx] != nullptr && _storage[idx] != HASHMAP_DUMMY_NODE; perturb >>= HASHMAP_PERTURB_SHIFT) {
484  idx = (5 * idx + perturb + 1) & _mask;
485  }
486 
487  _storage[idx] = old_storage[ctr];
488  _size++;
489  }
490 
491 #ifndef RELEASE_BUILD
492  // Perform a sanity check: Old number of elements should match the new one!
493  // This check will fail if some previous operation corrupted this hashmap.
494  assert(_size == old_size);
495 #endif
496 
497  delete[] old_storage;
498 
499  return;
500 }
501 
502 template<class Key, class Val, class HashFunc, class EqualFunc>
503 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
504  const size_type hash = _hash(key);
505  size_type ctr = hash & _mask;
506  for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
507  if (_storage[ctr] == nullptr)
508  break;
509  if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
510 #ifdef DEBUG_HASH_COLLISIONS
511  _dummyHits++;
512 #endif
513  } else if (_equal(_storage[ctr]->_key, key))
514  break;
515 
516  ctr = (5 * ctr + perturb + 1) & _mask;
517 
518 #ifdef DEBUG_HASH_COLLISIONS
519  _collisions++;
520 #endif
521  }
522 
523 #ifdef DEBUG_HASH_COLLISIONS
524  _lookups++;
525  debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
526  _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
527  (const void *)this, _mask + 1, _size);
528 #endif
529 
530  return ctr;
531 }
532 
533 template<class Key, class Val, class HashFunc, class EqualFunc>
534 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key &key) {
535  const size_type hash = _hash(key);
536  size_type ctr = hash & _mask;
537  const size_type NONE_FOUND = _mask + 1;
538  size_type first_free = NONE_FOUND;
539  bool found = false;
540  for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
541  if (_storage[ctr] == nullptr)
542  break;
543  if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
544 #ifdef DEBUG_HASH_COLLISIONS
545  _dummyHits++;
546 #endif
547  if (first_free == NONE_FOUND)
548  first_free = ctr;
549  } else if (_equal(_storage[ctr]->_key, key)) {
550  found = true;
551  break;
552  }
553 
554  ctr = (5 * ctr + perturb + 1) & _mask;
555 
556 #ifdef DEBUG_HASH_COLLISIONS
557  _collisions++;
558 #endif
559  }
560 
561 #ifdef DEBUG_HASH_COLLISIONS
562  _lookups++;
563  debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
564  _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
565  (const void *)this, _mask + 1, _size);
566 #endif
567 
568  if (!found && first_free != NONE_FOUND)
569  ctr = first_free;
570 
571  if (!found) {
572  if (_storage[ctr])
573  _deleted--;
574  _storage[ctr] = allocNode(key);
575  assert(_storage[ctr] != nullptr);
576  _size++;
577 
578  // Keep the load factor below a certain threshold.
579  // Deleted nodes are also counted
580  size_type capacity = _mask + 1;
581  if ((_size + _deleted) * HASHMAP_LOADFACTOR_DENOMINATOR >
582  capacity * HASHMAP_LOADFACTOR_NUMERATOR) {
583  capacity = capacity < 500 ? (capacity * 4) : (capacity * 2);
584  expandStorage(capacity);
585  ctr = lookup(key);
586  assert(_storage[ctr] != nullptr);
587  }
588  }
589 
590  return ctr;
591 }
592 
597 template<class Key, class Val, class HashFunc, class EqualFunc>
599  size_type ctr = lookup(key);
600  return (_storage[ctr] != nullptr);
601 }
602 
607 template<class Key, class Val, class HashFunc, class EqualFunc>
609  return getOrCreateVal(key);
610 }
611 
616 template<class Key, class Val, class HashFunc, class EqualFunc>
617 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) const {
618  return getVal(key);
619 }
620 
625 template<class Key, class Val, class HashFunc, class EqualFunc>
627  size_type ctr = lookupAndCreateIfMissing(key);
628  assert(_storage[ctr] != nullptr);
629  return _storage[ctr]->_value;
630 }
631 
636 template<class Key, class Val, class HashFunc, class EqualFunc>
638  size_type ctr = lookup(key);
639  if (_storage[ctr] != nullptr)
640  return _storage[ctr]->_value;
641  else
642  // In the past getVal() and operator[] used to return the default value for this case.
643  // Clarifying the intent by using getValOrDefault() when we query a key that may not be
644  // present is a good idea, but we have a lot of legacy code that may need to be updated.
645  // So for now only returns an error in non-release builds. Once we are confident all the
646  // code has been updated to use the correct function we can remove the RELEASE_BUILD
647  // special case.
648 #ifdef RELEASE_BUILD
649  return _defaultVal;
650 #else
651  unknownKeyError(key);
652 #endif
653 }
654 
655 template<class Key, class Val, class HashFunc, class EqualFunc>
656 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) const {
657  size_type ctr = lookup(key);
658  if (_storage[ctr] != nullptr)
659  return _storage[ctr]->_value;
660  else
661  // See comment in non-const getVal() above.
662 #ifdef RELEASE_BUILD
663  return _defaultVal;
664 #else
665  unknownKeyError(key);
666 #endif
667 }
668 
669 template<class Key, class Val, class HashFunc, class EqualFunc>
670 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getValOrDefault(const Key &key) const {
671  return getValOrDefault(key, _defaultVal);
672 }
673 
678 template<class Key, class Val, class HashFunc, class EqualFunc>
679 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getValOrDefault(const Key &key, const Val &defaultVal) const {
680  size_type ctr = lookup(key);
681  if (_storage[ctr] != nullptr)
682  return _storage[ctr]->_value;
683  else
684  return defaultVal;
685 }
686 
691 template<class Key, class Val, class HashFunc, class EqualFunc>
692 bool HashMap<Key, Val, HashFunc, EqualFunc>::tryGetVal(const Key &key, Val &out) const {
693  size_type ctr = lookup(key);
694  if (_storage[ctr] != nullptr) {
695  out = _storage[ctr]->_value;
696  return true;
697  } else {
698  return false;
699  }
700 }
701 
702 template<class Key, class Val, class HashFunc, class EqualFunc>
703 void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &val) {
704  size_type ctr = lookupAndCreateIfMissing(key);
705  assert(_storage[ctr] != nullptr);
706  _storage[ctr]->_value = val;
707 }
708 
713 template<class Key, class Val, class HashFunc, class EqualFunc>
715  // Check whether we have a valid iterator
716  assert(entry._hashmap == this);
717  const size_type ctr = entry._idx;
718  assert(ctr <= _mask);
719  Node * const node = _storage[ctr];
720  assert(node != NULL);
721  assert(node != HASHMAP_DUMMY_NODE);
722 
723  // If we remove a key, we replace it with a dummy node.
724  freeNode(node);
725  _storage[ctr] = HASHMAP_DUMMY_NODE;
726  _size--;
727  _deleted++;
728 }
729 
734 template<class Key, class Val, class HashFunc, class EqualFunc>
736 
737  size_type ctr = lookup(key);
738  if (_storage[ctr] == nullptr)
739  return;
740 
741  // If we remove a key, we replace it with a dummy node.
742  freeNode(_storage[ctr]);
743  _storage[ctr] = HASHMAP_DUMMY_NODE;
744  _size--;
745  _deleted++;
746  return;
747 }
748 
749 #undef HASHMAP_DUMMY_NODE
750 
753 } // End of namespace Common
754 
755 #endif
void clear(bool shrinkArray=0)
Definition: hashmap.h:431
Definition: str.h:59
Val & getVal(const Key &key)
Definition: hashmap.h:637
Val & operator[](const Key &key)
Definition: hashmap.h:608
In find(In first, In last, const T &v)
Definition: algorithm.h:225
void debug(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
Definition: hashmap.h:85
void deleteChunk(T *ptr)
Definition: memorypool.h:149
Definition: algorithm.h:29
Definition: memorypool.h:143
bool empty() const
Definition: hashmap.h:303
bool tryGetVal(const Key &key, Val &out) const
Definition: hashmap.h:692
bool contains(const Key &key) const
Definition: hashmap.h:598
void NORETURN_PRE error(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
void erase(iterator entry)
Definition: hashmap.h:714
HashMap()
Definition: hashmap.h:347
~HashMap()
Definition: hashmap.h:383
Val & getOrCreateVal(const Key &key)
Definition: hashmap.h:626
Definition: lobject.h:332
Definition: hashmap.h:89