ScummVM API documentation
hashmap.h
1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 // The hash map (associative array) implementation in this file is
23 // based on the PyDict implementation of CPython.
24 
25 #ifndef COMMON_HASHMAP_H
26 #define COMMON_HASHMAP_H
27 
28 // Enable the following #define if you want to check how many collisions the
29 // code produces (many collisions indicate either a bad hash function, or a
30 // hash table that is too small).
31 
32 //#define DEBUG_HASH_COLLISIONS
33 
39 #define USE_HASHMAP_MEMORY_POOL
40 
41 
42 #include "common/func.h"
43 
44 #include "common/str.h"
45 
46 #ifdef DEBUG_HASH_COLLISIONS
47 #include "common/debug.h"
48 #endif
49 
50 #ifdef USE_HASHMAP_MEMORY_POOL
51 #include "common/memorypool.h"
52 #endif
53 
54 namespace Common {
55 
65 // The Intel C++ Compiler has difficulties with nested templates.
66 #if defined(__INTEL_COMPILER)
67 template<class T> class IteratorImpl;
68 #endif
69 
70 
84 template<class Key, class Val, class HashFunc = Hash<Key>, class EqualFunc = EqualTo<Key> >
85 class HashMap {
86 public:
87  typedef uint size_type;
88 
89  struct Node {
90  Val _value;
91  const Key _key;
92  explicit Node(const Key &key) : _key(key), _value() {}
93  Node() : _key(), _value() {}
94  };
95 
96 private:
97 
99 
100  enum {
101  HASHMAP_PERTURB_SHIFT = 5,
102  HASHMAP_MIN_CAPACITY = 16,
103 
104  // The quotient of the next two constants controls how much the
105  // internal storage of the hashmap may fill up before being
106  // increased automatically.
107  // Note: the quotient of these two must be between and different
108  // from 0 and 1.
109  HASHMAP_LOADFACTOR_NUMERATOR = 2,
110  HASHMAP_LOADFACTOR_DENOMINATOR = 3,
111 
112  HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR
113  };
114 
115 #ifdef USE_HASHMAP_MEMORY_POOL
117 #endif
118 
120  Val _defaultVal;
121 
122  Node **_storage;
123  size_type _mask;
124  size_type _size;
125  size_type _deleted;
126 
127  HashFunc _hash;
128  EqualFunc _equal;
129 
131  #define HASHMAP_DUMMY_NODE ((Node *)1)
132 
133 #ifdef DEBUG_HASH_COLLISIONS
134  mutable int _collisions, _lookups, _dummyHits;
135 #endif
136 
137  Node *allocNode(const Key &key) {
138 #ifdef USE_HASHMAP_MEMORY_POOL
139  return new (_nodePool) Node(key);
140 #else
141  return new Node(key);
142 #endif
143  }
144 
145  void freeNode(Node *node) {
146  if (node && node != HASHMAP_DUMMY_NODE)
147 #ifdef USE_HASHMAP_MEMORY_POOL
148  _nodePool.deleteChunk(node);
149 #else
150  delete node;
151 #endif
152  }
153 
154  void assign(const HM_t &map);
155  size_type lookup(const Key &key) const;
156  size_type lookupAndCreateIfMissing(const Key &key);
157  void expandStorage(size_type newCapacity);
158 
159  template<class T> friend class IteratorImpl;
160 
164  template<class NodeType>
165  class IteratorImpl {
166  friend class HashMap;
167 #if defined(__INTEL_COMPILER)
168  template<class T> friend class Common::IteratorImpl;
169 #else
170  template<class T> friend class IteratorImpl;
171 #endif
172  protected:
173  typedef const HashMap hashmap_t;
174 
175  size_type _idx;
176  hashmap_t *_hashmap;
177 
178  protected:
179  IteratorImpl(size_type idx, hashmap_t *hashmap) : _idx(idx), _hashmap(hashmap) {}
180 
181  NodeType *deref() const {
182  assert(_hashmap != nullptr);
183  assert(_idx <= _hashmap->_mask);
184  Node *node = _hashmap->_storage[_idx];
185  assert(node != nullptr);
186  assert(node != HASHMAP_DUMMY_NODE);
187  return node;
188  }
189 
190  public:
191  IteratorImpl() : _idx(0), _hashmap(nullptr) {}
192  template<class T>
193  IteratorImpl(const IteratorImpl<T> &c) : _idx(c._idx), _hashmap(c._hashmap) {}
194 
195  NodeType &operator*() const { return *deref(); }
196  NodeType *operator->() const { return deref(); }
197 
198  bool operator==(const IteratorImpl &iter) const { return _idx == iter._idx && _hashmap == iter._hashmap; }
199  bool operator!=(const IteratorImpl &iter) const { return !(*this == iter); }
200 
201  IteratorImpl &operator++() {
202  assert(_hashmap);
203  do {
204  _idx++;
205  } while (_idx <= _hashmap->_mask && (_hashmap->_storage[_idx] == nullptr || _hashmap->_storage[_idx] == HASHMAP_DUMMY_NODE));
206  if (_idx > _hashmap->_mask)
207  _idx = (size_type)-1;
208 
209  return *this;
210  }
211 
212  IteratorImpl operator++(int) {
213  IteratorImpl old = *this;
214  operator ++();
215  return old;
216  }
217  };
218 
219 public:
220  typedef IteratorImpl<Node> iterator;
221  typedef IteratorImpl<const Node> const_iterator;
222 
223  HashMap();
224  HashMap(const HM_t &map);
225  ~HashMap();
226 
227  HM_t &operator=(const HM_t &map) {
228  if (this == &map)
229  return *this;
230 
231  // Remove the previous content and ...
232  clear();
233  delete[] _storage;
234  // ... copy the new stuff.
235  assign(map);
236  return *this;
237  }
238 
239  bool contains(const Key &key) const;
240 
241  Val &operator[](const Key &key);
242  const Val &operator[](const Key &key) const;
243 
244  Val &getOrCreateVal(const Key &key);
245  Val &getVal(const Key &key);
246  const Val &getVal(const Key &key) const;
247  const Val &getValOrDefault(const Key &key) const;
248  const Val &getValOrDefault(const Key &key, const Val &defaultVal) const;
249  bool tryGetVal(const Key &key, Val &out) const;
250  void setVal(const Key &key, const Val &val);
251 
252  void clear(bool shrinkArray = 0);
253 
254  void erase(iterator entry);
255  void erase(const Key &key);
256 
257  size_type size() const { return _size; }
258 
259  iterator begin() {
260  // Find and return the first non-empty entry
261  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
262  if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
263  return iterator(ctr, this);
264  }
265  return end();
266  }
267  iterator end() {
268  return iterator((size_type)-1, this);
269  }
270 
271  const_iterator begin() const {
272  // Find and return the first non-empty entry
273  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
274  if (_storage[ctr] && _storage[ctr] != HASHMAP_DUMMY_NODE)
275  return const_iterator(ctr, this);
276  }
277  return end();
278  }
279  const_iterator end() const {
280  return const_iterator((size_type)-1, this);
281  }
282 
283  iterator find(const Key &key) {
284  size_type ctr = lookup(key);
285  if (_storage[ctr])
286  return iterator(ctr, this);
287  return end();
288  }
289 
290  const_iterator find(const Key &key) const {
291  size_type ctr = lookup(key);
292  if (_storage[ctr])
293  return const_iterator(ctr, this);
294  return end();
295  }
296 
297  // TODO: insert() method?
299  bool empty() const {
300  return (_size == 0);
301  }
302 };
303 
304 template <class Key>
305 void NORETURN_PRE unknownKeyError(Key k) NORETURN_POST {
306  error("Unknown key");
307 }
308 
309 template<>
310 void NORETURN_PRE unknownKeyError(::Common::String key) NORETURN_POST;
311 template<>
312 void NORETURN_PRE unknownKeyError(signed char key) NORETURN_POST;
313 template<>
314 void NORETURN_PRE unknownKeyError(unsigned char key) NORETURN_POST;
315 template<>
316 void NORETURN_PRE unknownKeyError(short signed key) NORETURN_POST;
317 template<>
318 void NORETURN_PRE unknownKeyError(short unsigned key) NORETURN_POST;
319 template<>
320 void NORETURN_PRE unknownKeyError(long signed key) NORETURN_POST;
321 template<>
322 void NORETURN_PRE unknownKeyError(long unsigned key) NORETURN_POST;
323 template<>
324 void NORETURN_PRE unknownKeyError(signed int key) NORETURN_POST;
325 template<>
326 void NORETURN_PRE unknownKeyError(unsigned int key) NORETURN_POST;
327 template<>
328 void NORETURN_PRE unknownKeyError(long long signed key) NORETURN_POST;
329 template<>
330 void NORETURN_PRE unknownKeyError(long long unsigned key) NORETURN_POST;
331 template<>
332 void NORETURN_PRE unknownKeyError(void *key) NORETURN_POST;
333 template<>
334 void NORETURN_PRE unknownKeyError(const char *key) NORETURN_POST;
335 
336 //-------------------------------------------------------
337 // HashMap functions
338 
342 template<class Key, class Val, class HashFunc, class EqualFunc>
344  _mask = HASHMAP_MIN_CAPACITY - 1;
345  _storage = new Node *[HASHMAP_MIN_CAPACITY];
346  assert(_storage != nullptr);
347  memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
348 
349  _size = 0;
350  _deleted = 0;
351 
352 #ifdef DEBUG_HASH_COLLISIONS
353  _collisions = 0;
354  _lookups = 0;
355  _dummyHits = 0;
356 #endif
357 }
358 
364 template<class Key, class Val, class HashFunc, class EqualFunc>
366  _defaultVal() {
367 #ifdef DEBUG_HASH_COLLISIONS
368  _collisions = 0;
369  _lookups = 0;
370  _dummyHits = 0;
371 #endif
372  assign(map);
373 }
374 
378 template<class Key, class Val, class HashFunc, class EqualFunc>
380  for (size_type ctr = 0; ctr <= _mask; ++ctr)
381  freeNode(_storage[ctr]);
382 
383  delete[] _storage;
384 #ifdef DEBUG_HASH_COLLISIONS
385  extern void updateHashCollisionStats(int, int, int, int, int);
386  updateHashCollisionStats(_collisions, _dummyHits, _lookups, _mask + 1, _size);
387 #endif
388 }
389 
397 template<class Key, class Val, class HashFunc, class EqualFunc>
399  _mask = map._mask;
400  _storage = new Node *[_mask + 1];
401  assert(_storage != nullptr);
402  memset(_storage, 0, (_mask + 1) * sizeof(Node *));
403 
404  // Simply clone the map given to us, one by one.
405  _size = 0;
406  _deleted = 0;
407  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
408  if (map._storage[ctr] == HASHMAP_DUMMY_NODE) {
409  _storage[ctr] = HASHMAP_DUMMY_NODE;
410  _deleted++;
411  } else if (map._storage[ctr] != nullptr) {
412  _storage[ctr] = allocNode(map._storage[ctr]->_key);
413  _storage[ctr]->_value = map._storage[ctr]->_value;
414  _size++;
415  }
416  }
417  // Perform a sanity check (to help track down hashmap corruption)
418  assert(_size == map._size);
419  assert(_deleted == map._deleted);
420 }
421 
426 template<class Key, class Val, class HashFunc, class EqualFunc>
428  for (size_type ctr = 0; ctr <= _mask; ++ctr) {
429  freeNode(_storage[ctr]);
430  _storage[ctr] = nullptr;
431  }
432 
433 #ifdef USE_HASHMAP_MEMORY_POOL
434  _nodePool.freeUnusedPages();
435 #endif
436 
437  if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) {
438  delete[] _storage;
439 
440  _mask = HASHMAP_MIN_CAPACITY - 1;
441  _storage = new Node *[HASHMAP_MIN_CAPACITY];
442  assert(_storage != nullptr);
443  memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
444  }
445 
446  _size = 0;
447  _deleted = 0;
448 }
449 
450 template<class Key, class Val, class HashFunc, class EqualFunc>
451 void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(size_type newCapacity) {
452  assert(newCapacity > _mask + 1);
453 
454 #ifndef RELEASE_BUILD
455  const size_type old_size = _size;
456 #endif
457  const size_type old_mask = _mask;
458  Node **old_storage = _storage;
459 
460  // allocate a new array
461  _size = 0;
462  _deleted = 0;
463  _mask = newCapacity - 1;
464  _storage = new Node *[newCapacity];
465  assert(_storage != nullptr);
466  memset(_storage, 0, newCapacity * sizeof(Node *));
467 
468  // rehash all the old elements
469  for (size_type ctr = 0; ctr <= old_mask; ++ctr) {
470  if (old_storage[ctr] == nullptr || old_storage[ctr] == HASHMAP_DUMMY_NODE)
471  continue;
472 
473  // Insert the element from the old table into the new table.
474  // Since we know that no key exists twice in the old table, we
475  // can do this slightly better than by calling lookup, since we
476  // don't have to call _equal().
477  const size_type hash = _hash(old_storage[ctr]->_key);
478  size_type idx = hash & _mask;
479  for (size_type perturb = hash; _storage[idx] != nullptr && _storage[idx] != HASHMAP_DUMMY_NODE; perturb >>= HASHMAP_PERTURB_SHIFT) {
480  idx = (5 * idx + perturb + 1) & _mask;
481  }
482 
483  _storage[idx] = old_storage[ctr];
484  _size++;
485  }
486 
487 #ifndef RELEASE_BUILD
488  // Perform a sanity check: Old number of elements should match the new one!
489  // This check will fail if some previous operation corrupted this hashmap.
490  assert(_size == old_size);
491 #endif
492 
493  delete[] old_storage;
494 
495  return;
496 }
497 
498 template<class Key, class Val, class HashFunc, class EqualFunc>
499 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
500  const size_type hash = _hash(key);
501  size_type ctr = hash & _mask;
502  for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
503  if (_storage[ctr] == nullptr)
504  break;
505  if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
506 #ifdef DEBUG_HASH_COLLISIONS
507  _dummyHits++;
508 #endif
509  } else if (_equal(_storage[ctr]->_key, key))
510  break;
511 
512  ctr = (5 * ctr + perturb + 1) & _mask;
513 
514 #ifdef DEBUG_HASH_COLLISIONS
515  _collisions++;
516 #endif
517  }
518 
519 #ifdef DEBUG_HASH_COLLISIONS
520  _lookups++;
521  debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
522  _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
523  (const void *)this, _mask + 1, _size);
524 #endif
525 
526  return ctr;
527 }
528 
529 template<class Key, class Val, class HashFunc, class EqualFunc>
530 typename HashMap<Key, Val, HashFunc, EqualFunc>::size_type HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key &key) {
531  const size_type hash = _hash(key);
532  size_type ctr = hash & _mask;
533  const size_type NONE_FOUND = _mask + 1;
534  size_type first_free = NONE_FOUND;
535  bool found = false;
536  for (size_type perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
537  if (_storage[ctr] == nullptr)
538  break;
539  if (_storage[ctr] == HASHMAP_DUMMY_NODE) {
540 #ifdef DEBUG_HASH_COLLISIONS
541  _dummyHits++;
542 #endif
543  if (first_free == NONE_FOUND)
544  first_free = ctr;
545  } else if (_equal(_storage[ctr]->_key, key)) {
546  found = true;
547  break;
548  }
549 
550  ctr = (5 * ctr + perturb + 1) & _mask;
551 
552 #ifdef DEBUG_HASH_COLLISIONS
553  _collisions++;
554 #endif
555  }
556 
557 #ifdef DEBUG_HASH_COLLISIONS
558  _lookups++;
559  debug("collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d",
560  _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
561  (const void *)this, _mask + 1, _size);
562 #endif
563 
564  if (!found && first_free != NONE_FOUND)
565  ctr = first_free;
566 
567  if (!found) {
568  if (_storage[ctr])
569  _deleted--;
570  _storage[ctr] = allocNode(key);
571  assert(_storage[ctr] != nullptr);
572  _size++;
573 
574  // Keep the load factor below a certain threshold.
575  // Deleted nodes are also counted
576  size_type capacity = _mask + 1;
577  if ((_size + _deleted) * HASHMAP_LOADFACTOR_DENOMINATOR >
578  capacity * HASHMAP_LOADFACTOR_NUMERATOR) {
579  capacity = capacity < 500 ? (capacity * 4) : (capacity * 2);
580  expandStorage(capacity);
581  ctr = lookup(key);
582  assert(_storage[ctr] != nullptr);
583  }
584  }
585 
586  return ctr;
587 }
588 
593 template<class Key, class Val, class HashFunc, class EqualFunc>
595  size_type ctr = lookup(key);
596  return (_storage[ctr] != nullptr);
597 }
598 
603 template<class Key, class Val, class HashFunc, class EqualFunc>
605  return getOrCreateVal(key);
606 }
607 
612 template<class Key, class Val, class HashFunc, class EqualFunc>
613 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) const {
614  return getVal(key);
615 }
616 
621 template<class Key, class Val, class HashFunc, class EqualFunc>
623  size_type ctr = lookupAndCreateIfMissing(key);
624  assert(_storage[ctr] != nullptr);
625  return _storage[ctr]->_value;
626 }
627 
632 template<class Key, class Val, class HashFunc, class EqualFunc>
634  size_type ctr = lookup(key);
635  if (_storage[ctr] != nullptr)
636  return _storage[ctr]->_value;
637  else
638  // In the past getVal() and operator[] used to return the default value for this case.
639  // Clarifying the intent by using getValOrDefault() when we query a key that may not be
640  // present is a good idea, but we have a lot of legacy code that may need to be updated.
641  // So for now only returns an error in non-release builds. Once we are confident all the
642  // code has been updated to use the correct function we can remove the RELEASE_BUILD
643  // special case.
644 #ifdef RELEASE_BUILD
645  return _defaultVal;
646 #else
647  unknownKeyError(key);
648 #endif
649 }
650 
651 template<class Key, class Val, class HashFunc, class EqualFunc>
652 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) const {
653  size_type ctr = lookup(key);
654  if (_storage[ctr] != nullptr)
655  return _storage[ctr]->_value;
656  else
657  // See comment in non-const getVal() above.
658 #ifdef RELEASE_BUILD
659  return _defaultVal;
660 #else
661  unknownKeyError(key);
662 #endif
663 }
664 
665 template<class Key, class Val, class HashFunc, class EqualFunc>
666 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getValOrDefault(const Key &key) const {
667  return getValOrDefault(key, _defaultVal);
668 }
669 
674 template<class Key, class Val, class HashFunc, class EqualFunc>
675 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getValOrDefault(const Key &key, const Val &defaultVal) const {
676  size_type ctr = lookup(key);
677  if (_storage[ctr] != nullptr)
678  return _storage[ctr]->_value;
679  else
680  return defaultVal;
681 }
682 
687 template<class Key, class Val, class HashFunc, class EqualFunc>
688 bool HashMap<Key, Val, HashFunc, EqualFunc>::tryGetVal(const Key &key, Val &out) const {
689  size_type ctr = lookup(key);
690  if (_storage[ctr] != nullptr) {
691  out = _storage[ctr]->_value;
692  return true;
693  } else {
694  return false;
695  }
696 }
697 
698 template<class Key, class Val, class HashFunc, class EqualFunc>
699 void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &val) {
700  size_type ctr = lookupAndCreateIfMissing(key);
701  assert(_storage[ctr] != nullptr);
702  _storage[ctr]->_value = val;
703 }
704 
709 template<class Key, class Val, class HashFunc, class EqualFunc>
711  // Check whether we have a valid iterator
712  assert(entry._hashmap == this);
713  const size_type ctr = entry._idx;
714  assert(ctr <= _mask);
715  Node * const node = _storage[ctr];
716  assert(node != NULL);
717  assert(node != HASHMAP_DUMMY_NODE);
718 
719  // If we remove a key, we replace it with a dummy node.
720  freeNode(node);
721  _storage[ctr] = HASHMAP_DUMMY_NODE;
722  _size--;
723  _deleted++;
724 }
725 
730 template<class Key, class Val, class HashFunc, class EqualFunc>
732 
733  size_type ctr = lookup(key);
734  if (_storage[ctr] == nullptr)
735  return;
736 
737  // If we remove a key, we replace it with a dummy node.
738  freeNode(_storage[ctr]);
739  _storage[ctr] = HASHMAP_DUMMY_NODE;
740  _size--;
741  _deleted++;
742  return;
743 }
744 
745 #undef HASHMAP_DUMMY_NODE
746 
749 } // End of namespace Common
750 
751 #endif
void clear(bool shrinkArray=0)
Definition: hashmap.h:427
Definition: str.h:59
Val & getVal(const Key &key)
Definition: hashmap.h:633
Val & operator[](const Key &key)
Definition: hashmap.h:604
In find(In first, In last, const T &v)
Definition: algorithm.h:225
void debug(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
Definition: hashmap.h:85
void deleteChunk(T *ptr)
Definition: memorypool.h:145
Definition: algorithm.h:29
Definition: memorypool.h:139
bool empty() const
Definition: hashmap.h:299
bool tryGetVal(const Key &key, Val &out) const
Definition: hashmap.h:688
bool contains(const Key &key) const
Definition: hashmap.h:594
void NORETURN_PRE error(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
void erase(iterator entry)
Definition: hashmap.h:710
HashMap()
Definition: hashmap.h:343
~HashMap()
Definition: hashmap.h:379
Val & getOrCreateVal(const Key &key)
Definition: hashmap.h:622
Definition: lobject.h:332
Definition: hashmap.h:89