emilib
hash_map.hpp
1 // By Emil Ernerfeldt 2014-2016
2 // LICENSE:
3 // This software is dual-licensed to the public domain and under the following
4 // license: you are granted a perpetual, irrevocable license to copy, modify,
5 // publish, and distribute this file as you see fit.
6 
7 #pragma once
8 
9 #include <cstdlib>
10 #include <iterator>
11 #include <utility>
12 
13 #include <loguru.hpp>
14 
15 namespace emilib {
16 
18 template<typename T>
20 {
21  constexpr bool operator()(const T &lhs, const T &rhs) const
22  {
23  return lhs == rhs;
24  }
25 };
26 
28 template <typename KeyT, typename ValueT, typename HashT = std::hash<KeyT>, typename EqT = HashMapEqualTo<KeyT>>
29 class HashMap
30 {
31 private:
33 
34  using PairT = std::pair<KeyT, ValueT>;
35 public:
36  using size_type = size_t;
37  using value_type = PairT;
38  using reference = PairT&;
39  using const_reference = const PairT&;
40 
41  class iterator
42  {
43  public:
44  using iterator_category = std::forward_iterator_tag;
45  using difference_type = size_t;
46  using distance_type = size_t;
47  using value_type = std::pair<KeyT, ValueT>;
48  using pointer = value_type*;
49  using reference = value_type&;
50 
51  iterator() { }
52 
53  iterator(MyType* hash_map, size_t bucket) : _map(hash_map), _bucket(bucket)
54  {
55  }
56 
57  iterator& operator++()
58  {
59  this->goto_next_element();
60  return *this;
61  }
62 
63  iterator operator++(int)
64  {
65  size_t old_index = _bucket;
66  this->goto_next_element();
67  return iterator(_map, old_index);
68  }
69 
70  reference operator*() const
71  {
72  return _map->_pairs[_bucket];
73  }
74 
75  pointer operator->() const
76  {
77  return _map->_pairs + _bucket;
78  }
79 
80  bool operator==(const iterator& rhs) const
81  {
82  DCHECK_EQ_F(_map, rhs._map);
83  return this->_bucket == rhs._bucket;
84  }
85 
86  bool operator!=(const iterator& rhs) const
87  {
88  DCHECK_EQ_F(_map, rhs._map);
89  return this->_bucket != rhs._bucket;
90  }
91 
92  private:
93  void goto_next_element()
94  {
95  DCHECK_LT_F(_bucket, _map->_num_buckets);
96  do {
97  _bucket++;
98  } while (_bucket < _map->_num_buckets && _map->_states[_bucket] != State::FILLED);
99  }
100 
101  //private:
102  // friend class MyType;
103  public:
104  MyType* _map;
105  size_t _bucket;
106  };
107 
109  {
110  public:
111  using iterator_category = std::forward_iterator_tag;
112  using difference_type = size_t;
113  using distance_type = size_t;
114  using value_type = const std::pair<KeyT, ValueT>;
115  using pointer = value_type*;
116  using reference = value_type&;
117 
118  const_iterator() { }
119 
120  const_iterator(iterator proto) : _map(proto._map), _bucket(proto._bucket)
121  {
122  }
123 
124  const_iterator(const MyType* hash_map, size_t bucket) : _map(hash_map), _bucket(bucket)
125  {
126  }
127 
128  const_iterator& operator++()
129  {
130  this->goto_next_element();
131  return *this;
132  }
133 
134  const_iterator operator++(int)
135  {
136  size_t old_index = _bucket;
137  this->goto_next_element();
138  return const_iterator(_map, old_index);
139  }
140 
141  reference operator*() const
142  {
143  return _map->_pairs[_bucket];
144  }
145 
146  pointer operator->() const
147  {
148  return _map->_pairs + _bucket;
149  }
150 
151  bool operator==(const const_iterator& rhs) const
152  {
153  DCHECK_EQ_F(_map, rhs._map);
154  return this->_bucket == rhs._bucket;
155  }
156 
157  bool operator!=(const const_iterator& rhs) const
158  {
159  DCHECK_EQ_F(_map, rhs._map);
160  return this->_bucket != rhs._bucket;
161  }
162 
163  private:
164  void goto_next_element()
165  {
166  DCHECK_LT_F(_bucket, _map->_num_buckets);
167  do {
168  _bucket++;
169  } while (_bucket < _map->_num_buckets && _map->_states[_bucket] != State::FILLED);
170  }
171 
172  //private:
173  // friend class MyType;
174  public:
175  const MyType* _map;
176  size_t _bucket;
177  };
178 
179  // ------------------------------------------------------------------------
180 
181  HashMap() = default;
182 
183  HashMap(const HashMap& other)
184  {
185  reserve(other.size());
186  insert(cbegin(other), cend(other));
187  }
188 
189  HashMap(HashMap&& other)
190  {
191  *this = std::move(other);
192  }
193 
194  HashMap& operator=(const HashMap& other)
195  {
196  clear();
197  reserve(other.size());
198  insert(cbegin(other), cend(other));
199  return *this;
200  }
201 
202  void operator=(HashMap&& other)
203  {
204  this->swap(other);
205  }
206 
207  ~HashMap()
208  {
209  for (size_t bucket=0; bucket<_num_buckets; ++bucket) {
210  if (_states[bucket] == State::FILLED) {
211  _pairs[bucket].~PairT();
212  }
213  }
214  free(_states);
215  free(_pairs);
216  }
217 
218  void swap(HashMap& other)
219  {
220  std::swap(_hasher, other._hasher);
221  std::swap(_eq, other._eq);
222  std::swap(_states, other._states);
223  std::swap(_pairs, other._pairs);
224  std::swap(_num_buckets, other._num_buckets);
225  std::swap(_num_filled, other._num_filled);
226  std::swap(_max_probe_length, other._max_probe_length);
227  std::swap(_mask, other._mask);
228  }
229 
230  // -------------------------------------------------------------
231 
232  iterator begin()
233  {
234  size_t bucket = 0;
235  while (bucket<_num_buckets && _states[bucket] != State::FILLED) {
236  ++bucket;
237  }
238  return iterator(this, bucket);
239  }
240 
241  const_iterator begin() const
242  {
243  size_t bucket = 0;
244  while (bucket<_num_buckets && _states[bucket] != State::FILLED) {
245  ++bucket;
246  }
247  return const_iterator(this, bucket);
248  }
249 
250  iterator end()
251  { return iterator(this, _num_buckets); }
252 
253  const_iterator end() const
254  { return const_iterator(this, _num_buckets); }
255 
256  size_t size() const
257  {
258  return _num_filled;
259  }
260 
261  bool empty() const
262  {
263  return _num_filled==0;
264  }
265 
266  // ------------------------------------------------------------
267 
268  iterator find(const KeyT& key)
269  {
270  auto bucket = this->find_filled_bucket(key);
271  if (bucket == (size_t)-1) {
272  return this->end();
273  }
274  return iterator(this, bucket);
275  }
276 
277  const_iterator find(const KeyT& key) const
278  {
279  auto bucket = this->find_filled_bucket(key);
280  if (bucket == (size_t)-1)
281  {
282  return this->end();
283  }
284  return const_iterator(this, bucket);
285  }
286 
287  bool contains(const KeyT& k) const
288  {
289  return find_filled_bucket(k) != (size_t)-1;
290  }
291 
292  size_t count(const KeyT& k) const
293  {
294  return find_filled_bucket(k) != (size_t)-1 ? 1 : 0;
295  }
296 
298  ValueT* try_get(const KeyT& k)
299  {
300  auto bucket = find_filled_bucket(k);
301  if (bucket != (size_t)-1) {
302  return &_pairs[bucket].second;
303  } else {
304  return nullptr;
305  }
306  }
307 
309  const ValueT* try_get(const KeyT& k) const
310  {
311  auto bucket = find_filled_bucket(k);
312  if (bucket != (size_t)-1) {
313  return &_pairs[bucket].second;
314  } else {
315  return nullptr;
316  }
317  }
318 
320  const ValueT get_or_return_default(const KeyT& k) const
321  {
322  const ValueT* ret = try_get(k);
323  if (ret) {
324  return *ret;
325  } else {
326  return ValueT();
327  }
328  }
329 
330  // -----------------------------------------------------
331 
335  std::pair<iterator, bool> insert(const KeyT& key, const ValueT& value)
336  {
337  check_expand_need();
338 
339  auto bucket = find_or_allocate(key);
340 
341  if (_states[bucket] == State::FILLED) {
342  return { iterator(this, bucket), false };
343  } else {
344  _states[bucket] = State::FILLED;
345  new(_pairs + bucket) PairT(key, value);
346  _num_filled++;
347  return { iterator(this, bucket), true };
348  }
349  }
350 
351  std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT>& p)
352  {
353  return insert(p.first, p.second);
354  }
355 
356  void insert(const_iterator begin, const_iterator end)
357  {
358  for (; begin != end; ++begin) {
359  insert(begin->first, begin->second);
360  }
361  }
362 
364  void insert_unique(KeyT&& key, ValueT&& value)
365  {
366  DCHECK_F(!contains(key));
367  check_expand_need();
368  auto bucket = find_empty_bucket(key);
369  _states[bucket] = State::FILLED;
370  new(_pairs + bucket) PairT(std::move(key), std::move(value));
371  _num_filled++;
372  }
373 
374  void insert_unique(std::pair<KeyT, ValueT>&& p)
375  {
376  insert_unique(std::move(p.first), std::move(p.second));
377  }
378 
380  ValueT set_get(const KeyT& key, const ValueT& new_value)
381  {
382  check_expand_need();
383 
384  auto bucket = find_or_allocate(key);
385 
386  // Check if inserting a new value rather than overwriting an old entry
387  if (_states[bucket] == State::FILLED) {
388  ValueT old_value = _pairs[bucket].second;
389  _pairs[bucket] = new_value.second;
390  return old_value;
391  } else {
392  _states[bucket] = State::FILLED;
393  new(_pairs + bucket) PairT(key, new_value);
394  _num_filled++;
395  return ValueT();
396  }
397  }
398 
400  ValueT& operator[](const KeyT& key)
401  {
402  check_expand_need();
403 
404  auto bucket = find_or_allocate(key);
405 
406  /* Check if inserting a new value rather than overwriting an old entry */
407  if (_states[bucket] != State::FILLED) {
408  _states[bucket] = State::FILLED;
409  new(_pairs + bucket) PairT(key, ValueT());
410  _num_filled++;
411  }
412 
413  return _pairs[bucket].second;
414  }
415 
416  // -------------------------------------------------------
417 
420  bool erase(const KeyT& key)
421  {
422  auto bucket = find_filled_bucket(key);
423  if (bucket != (size_t)-1) {
424  _states[bucket] = State::ACTIVE;
425  _pairs[bucket].~PairT();
426  _num_filled -= 1;
427  return true;
428  } else {
429  return false;
430  }
431  }
432 
436  {
437  DCHECK_EQ_F(it._map, this);
438  DCHECK_LT_F(it._bucket, _num_buckets);
439  _states[it._bucket] = State::ACTIVE;
440  _pairs[it._bucket].~PairT();
441  _num_filled -= 1;
442  return ++it;
443  }
444 
446  void clear()
447  {
448  for (size_t bucket=0; bucket<_num_buckets; ++bucket) {
449  if (_states[bucket] == State::FILLED) {
450  _states[bucket] = State::INACTIVE;
451  _pairs[bucket].~PairT();
452  }
453  }
454  _num_filled = 0;
455  _max_probe_length = -1;
456  }
457 
459  void reserve(size_t num_elems)
460  {
461  size_t required_buckets = num_elems + num_elems/2 + 1;
462  if (required_buckets <= _num_buckets) {
463  return;
464  }
465  size_t num_buckets = 4;
466  while (num_buckets < required_buckets) { num_buckets *= 2; }
467 
468  auto new_states = (State*)malloc(num_buckets * sizeof(State));
469  auto new_pairs = (PairT*)malloc(num_buckets * sizeof(PairT));
470 
471  if (!new_states || !new_pairs) {
472  free(new_states);
473  free(new_pairs);
474  throw std::bad_alloc();
475  }
476 
477  //auto old_num_filled = _num_filled;
478  auto old_num_buckets = _num_buckets;
479  auto old_states = _states;
480  auto old_pairs = _pairs;
481 
482  _num_filled = 0;
483  _num_buckets = num_buckets;
484  _mask = _num_buckets - 1;
485  _states = new_states;
486  _pairs = new_pairs;
487 
488  std::fill_n(_states, num_buckets, State::INACTIVE);
489 
490  _max_probe_length = -1;
491 
492  for (size_t src_bucket=0; src_bucket<old_num_buckets; src_bucket++) {
493  if (old_states[src_bucket] == State::FILLED) {
494  auto& src_pair = old_pairs[src_bucket];
495 
496  auto dst_bucket = find_empty_bucket(src_pair.first);
497  DCHECK_NE_F(dst_bucket, (size_t)-1);
498  DCHECK_NE_F(_states[dst_bucket], State::FILLED);
499  _states[dst_bucket] = State::FILLED;
500  new(_pairs + dst_bucket) PairT(std::move(src_pair));
501  _num_filled += 1;
502 
503  src_pair.~PairT();
504  }
505  }
506 
507  //DCHECK_EQ_F(old_num_filled, _num_filled);
508 
509  free(old_states);
510  free(old_pairs);
511  }
512 
513 private:
514  // Can we fit another element?
515  void check_expand_need()
516  {
517  reserve(_num_filled + 1);
518  }
519 
520  // Find the bucket with this key, or return (size_t)-1
521  size_t find_filled_bucket(const KeyT& key) const
522  {
523  if (empty()) { return (size_t)-1; } // Optimization
524 
525  auto hash_value = _hasher(key);
526  for (int offset=0; offset<=_max_probe_length; ++offset) {
527  auto bucket = (hash_value + offset) & _mask;
528  if (_states[bucket] == State::FILLED) {
529  if (_eq(_pairs[bucket].first, key)) {
530  return bucket;
531  }
532  } else if (_states[bucket] == State::INACTIVE) {
533  return (size_t)-1; // End of the chain!
534  }
535  }
536  return (size_t)-1;
537  }
538 
539  // Find the bucket with this key, or return a good empty bucket to place the key in.
540  // In the latter case, the bucket is expected to be filled.
541  size_t find_or_allocate(const KeyT& key)
542  {
543  auto hash_value = _hasher(key);
544  size_t hole = (size_t)-1;
545  int offset=0;
546  for (; offset<=_max_probe_length; ++offset) {
547  auto bucket = (hash_value + offset) & _mask;
548 
549  if (_states[bucket] == State::FILLED) {
550  if (_eq(_pairs[bucket].first, key)) {
551  return bucket;
552  }
553  } else if (_states[bucket] == State::INACTIVE) {
554  return bucket;
555  } else {
556  // ACTIVE: keep searching
557  if (hole == (size_t)-1) {
558  hole = bucket;
559  }
560  }
561  }
562 
563  // No key found - but maybe a hole for it
564 
565  DCHECK_EQ_F(offset, _max_probe_length+1);
566 
567  if (hole != (size_t)-1) {
568  return hole;
569  }
570 
571  // No hole found within _max_probe_length
572  for (; ; ++offset) {
573  auto bucket = (hash_value + offset) & _mask;
574 
575  if (_states[bucket] != State::FILLED) {
576  _max_probe_length = offset;
577  return bucket;
578  }
579  }
580  }
581 
582  // key is not in this map. Find a place to put it.
583  size_t find_empty_bucket(const KeyT& key)
584  {
585  auto hash_value = _hasher(key);
586  for (int offset=0; ; ++offset) {
587  auto bucket = (hash_value + offset) & _mask;
588  if (_states[bucket] != State::FILLED) {
589  if (offset > _max_probe_length) {
590  _max_probe_length = offset;
591  }
592  return bucket;
593  }
594  }
595  }
596 
597 private:
598  enum class State : uint8_t
599  {
600  INACTIVE, // Never been touched
601  ACTIVE, // Is inside a search-chain, but is empty
602  FILLED // Is set with key/value
603  };
604 
605  HashT _hasher;
606  EqT _eq;
607  State* _states = nullptr;
608  PairT* _pairs = nullptr;
609  size_t _num_buckets = 0;
610  size_t _num_filled = 0;
611  int _max_probe_length = -1; // Our longest bucket-brigade is this long. ONLY when we have zero elements is this ever negative (-1).
612  size_t _mask = 0; // _num_buckets minus one
613 };
614 
615 } // namespace emilib
bool erase(const KeyT &key)
Definition: hash_map.hpp:420
const ValueT * try_get(const KeyT &k) const
Const version of the above.
Definition: hash_map.hpp:309
ValueT set_get(const KeyT &key, const ValueT &new_value)
Return the old value or ValueT() if it didn&#39;t exist.
Definition: hash_map.hpp:380
iterator erase(iterator it)
Definition: hash_map.hpp:435
void insert_unique(KeyT &&key, ValueT &&value)
Same as above, but contains(key) MUST be false.
Definition: hash_map.hpp:364
like std::equal_to but no need to #include <functional>
Definition: hash_map.hpp:19
ValueT & operator[](const KeyT &key)
Like std::map<KeyT,ValueT>::operator[].
Definition: hash_map.hpp:400
void clear()
Remove all elements, keeping full capacity.
Definition: hash_map.hpp:446
void reserve(size_t num_elems)
Make room for this many elements.
Definition: hash_map.hpp:459
std::pair< iterator, bool > insert(const KeyT &key, const ValueT &value)
Definition: hash_map.hpp:335
ValueT * try_get(const KeyT &k)
Returns the matching ValueT or nullptr if k isn&#39;t found.
Definition: hash_map.hpp:298
const ValueT get_or_return_default(const KeyT &k) const
Convenience function.
Definition: hash_map.hpp:320
Definition: hash_map.hpp:41
A cache-friendly hash table with open addressing, linear probing and power-of-two capacity...
Definition: hash_map.hpp:29
Definition: hash_map.hpp:108
Definition: coroutine.hpp:18