emilib
hash_set.hpp
1 // By Emil Ernerfeldt 2014-2016
2 // LICENSE:
3 // This software is dual-licensed to the public domain and under the following
4 // license: you are granted a perpetual, irrevocable license to copy, modify,
5 // publish, and distribute this file as you see fit.
6 
7 #pragma once
8 
9 #include <cstdlib> // malloc
10 #include <iterator>
11 #include <utility>
12 
13 #include <loguru.hpp>
14 
15 namespace emilib {
16 
18 template<typename T>
20 {
21  constexpr bool operator()(const T &lhs, const T &rhs) const
22  {
23  return lhs == rhs;
24  }
25 };
26 
28 template <typename KeyT, typename HashT = std::hash<KeyT>, typename EqT = HashSetEqualTo<KeyT>>
29 class HashSet
30 {
31 private:
33 
34 public:
35  using size_type = size_t;
36  using value_type = KeyT;
37  using reference = KeyT&;
38  using const_reference = const KeyT&;
39 
40  class iterator
41  {
42  public:
43  using iterator_category = std::forward_iterator_tag;
44  using difference_type = size_t;
45  using distance_type = size_t;
46  using value_type = KeyT;
47  using pointer = value_type*;
48  using reference = value_type&;
49 
50  iterator() { }
51 
52  iterator(MyType* hash_set, size_t bucket) : _set(hash_set), _bucket(bucket)
53  {
54  }
55 
56  iterator& operator++()
57  {
58  this->goto_next_element();
59  return *this;
60  }
61 
62  iterator operator++(int)
63  {
64  size_t old_index = _bucket;
65  this->goto_next_element();
66  return iterator(_set, old_index);
67  }
68 
69  reference operator*() const
70  {
71  return _set->_keys[_bucket];
72  }
73 
74  pointer operator->() const
75  {
76  return _set->_keys + _bucket;
77  }
78 
79  bool operator==(const iterator& rhs) const
80  {
81  DCHECK_EQ_F(_set, rhs._set);
82  return this->_bucket == rhs._bucket;
83  }
84 
85  bool operator!=(const iterator& rhs) const
86  {
87  DCHECK_EQ_F(_set, rhs._set);
88  return this->_bucket != rhs._bucket;
89  }
90 
91  private:
92  void goto_next_element()
93  {
94  DCHECK_LT_F(_bucket, _set->_num_buckets);
95  do {
96  _bucket++;
97  } while (_bucket < _set->_num_buckets && _set->_states[_bucket] != State::FILLED);
98  }
99 
100  //private:
101  // friend class MyType;
102  public:
103  MyType* _set;
104  size_t _bucket;
105  };
106 
108  {
109  public:
110  using iterator_category = std::forward_iterator_tag;
111  using difference_type = size_t;
112  using distance_type = size_t;
113  using value_type = const KeyT;
114  using pointer = value_type*;
115  using reference = value_type&;
116 
117  const_iterator() { }
118 
119  const_iterator(iterator proto) : _set(proto._set), _bucket(proto._bucket)
120  {
121  }
122 
123  const_iterator(const MyType* hash_set, size_t bucket) : _set(hash_set), _bucket(bucket)
124  {
125  }
126 
127  const_iterator& operator++()
128  {
129  this->goto_next_element();
130  return *this;
131  }
132 
133  const_iterator operator++(int)
134  {
135  size_t old_index = _bucket;
136  this->goto_next_element();
137  return const_iterator(_set, old_index);
138  }
139 
140  reference operator*() const
141  {
142  return _set->_keys[_bucket];
143  }
144 
145  pointer operator->() const
146  {
147  return _set->_keys + _bucket;
148  }
149 
150  bool operator==(const const_iterator& rhs) const
151  {
152  DCHECK_EQ_F(_set, rhs._set);
153  return this->_bucket == rhs._bucket;
154  }
155 
156  bool operator!=(const const_iterator& rhs) const
157  {
158  DCHECK_EQ_F(_set, rhs._set);
159  return this->_bucket != rhs._bucket;
160  }
161 
162  private:
163  void goto_next_element()
164  {
165  DCHECK_LT_F(_bucket, _set->_num_buckets);
166  do {
167  _bucket++;
168  } while (_bucket < _set->_num_buckets && _set->_states[_bucket] != State::FILLED);
169  }
170 
171  //private:
172  // friend class MyType;
173  public:
174  const MyType* _set;
175  size_t _bucket;
176  };
177 
178  // ------------------------------------------------------------------------
179 
180  HashSet() = default;
181 
182  HashSet(const HashSet& other)
183  {
184  reserve(other.size());
185  insert(cbegin(other), cend(other));
186  }
187 
188  HashSet(HashSet&& other)
189  {
190  *this = std::move(other);
191  }
192 
193  HashSet& operator=(const HashSet& other)
194  {
195  clear();
196  reserve(other.size());
197  insert(cbegin(other), cend(other));
198  return *this;
199  }
200 
201  void operator=(HashSet&& other)
202  {
203  this->swap(other);
204  }
205 
206  ~HashSet()
207  {
208  for (size_t bucket=0; bucket<_num_buckets; ++bucket) {
209  if (_states[bucket] == State::FILLED) {
210  _keys[bucket].~KeyT();
211  }
212  }
213  free(_states);
214  free(_keys);
215  }
216 
217  void swap(HashSet& other)
218  {
219  std::swap(_hasher, other._hasher);
220  std::swap(_eq, other._eq);
221  std::swap(_states, other._states);
222  std::swap(_keys, other._keys);
223  std::swap(_num_buckets, other._num_buckets);
224  std::swap(_num_filled, other._num_filled);
225  std::swap(_max_probe_length, other._max_probe_length);
226  std::swap(_mask, other._mask);
227  }
228 
229  // -------------------------------------------------------------
230 
231  iterator begin()
232  {
233  size_t bucket = 0;
234  while (bucket<_num_buckets && _states[bucket] != State::FILLED) {
235  ++bucket;
236  }
237  return iterator(this, bucket);
238  }
239 
240  const_iterator begin() const
241  {
242  size_t bucket = 0;
243  while (bucket<_num_buckets && _states[bucket] != State::FILLED) {
244  ++bucket;
245  }
246  return const_iterator(this, bucket);
247  }
248 
249  iterator end()
250  { return iterator(this, _num_buckets); }
251 
252  const_iterator end() const
253  { return const_iterator(this, _num_buckets); }
254 
255  size_t size() const
256  {
257  return _num_filled;
258  }
259 
260  bool empty() const
261  {
262  return _num_filled==0;
263  }
264 
265  size_t bucket_count() const
266  {
267  return _num_buckets;
268  }
269 
270  // ------------------------------------------------------------
271 
272  iterator find(const KeyT& key)
273  {
274  auto bucket = this->find_filled_bucket(key);
275  if (bucket == (size_t)-1) {
276  return this->end();
277  }
278  return iterator(this, bucket);
279  }
280 
281  const_iterator find(const KeyT& key) const
282  {
283  auto bucket = this->find_filled_bucket(key);
284  if (bucket == (size_t)-1) {
285  return this->end();
286  }
287  return const_iterator(this, bucket);
288  }
289 
290  bool contains(const KeyT& k) const
291  {
292  return find_filled_bucket(k) != (size_t)-1;
293  }
294 
295  size_t count(const KeyT& k) const
296  {
297  return find_filled_bucket(k) != (size_t)-1 ? 1 : 0;
298  }
299 
300  // -----------------------------------------------------
301 
306  std::pair<iterator, bool> insert(const KeyT& key)
307  {
308  check_expand_need();
309 
310  auto bucket = find_or_allocate(key);
311 
312  if (_states[bucket] == State::FILLED) {
313  return { iterator(this, bucket), false };
314  } else {
315  _states[bucket] = State::FILLED;
316  new(_keys + bucket) KeyT(key);
317  _num_filled++;
318  return { iterator(this, bucket), true };
319  }
320  }
321 
326  std::pair<iterator, bool> insert(KeyT&& key)
327  {
328  check_expand_need();
329 
330  auto bucket = find_or_allocate(key);
331 
332  if (_states[bucket] == State::FILLED) {
333  return { iterator(this, bucket), false };
334  } else {
335  _states[bucket] = State::FILLED;
336  new(_keys + bucket) KeyT(std::move(key));
337  _num_filled++;
338  return { iterator(this, bucket), true };
339  }
340  }
341 
342  template<class... Args>
343  std::pair<iterator, bool> emplace(Args&&... args)
344  {
345  return insert(KeyT(std::forward<Args>(args)...));
346  }
347 
348  void insert(const_iterator begin, const_iterator end)
349  {
350  for (; begin != end; ++begin) {
351  insert(*begin);
352  }
353  }
354 
356  void insert_unique(KeyT key)
357  {
358  DCHECK_F(!contains(key));
359  check_expand_need();
360  auto bucket = find_empty_bucket(key);
361  _states[bucket] = State::FILLED;
362  new(_keys + bucket) KeyT(std::move(key));
363  _num_filled++;
364  }
365 
366  // -------------------------------------------------------
367 
370  bool erase(const KeyT& key)
371  {
372  auto bucket = find_filled_bucket(key);
373  if (bucket != (size_t)-1) {
374  _states[bucket] = State::ACTIVE;
375  _keys[bucket].~KeyT();
376  _num_filled -= 1;
377  return true;
378  } else {
379  return false;
380  }
381  }
382 
386  {
387  DCHECK_EQ_F(it._set, this);
388  DCHECK_LT_F(it._bucket, _num_buckets);
389  _states[it._bucket] = State::ACTIVE;
390  _keys[it._bucket].~KeyT();
391  _num_filled -= 1;
392  return ++it;
393  }
394 
396  void clear()
397  {
398  for (size_t bucket=0; bucket<_num_buckets; ++bucket) {
399  if (_states[bucket] == State::FILLED) {
400  _states[bucket] = State::INACTIVE;
401  _keys[bucket].~KeyT();
402  }
403  }
404  _num_filled = 0;
405  _max_probe_length = -1;
406  }
407 
409  void reserve(size_t num_elems)
410  {
411  size_t required_buckets = num_elems + num_elems/2 + 1;
412  if (required_buckets <= _num_buckets) {
413  return;
414  }
415  size_t num_buckets = 4;
416  while (num_buckets < required_buckets) { num_buckets *= 2; }
417 
418  auto new_states = (State*)malloc(num_buckets * sizeof(State));
419  auto new_keys = (KeyT*)malloc(num_buckets * sizeof(KeyT));
420 
421  if (!new_states || !new_keys) {
422  free(new_states);
423  free(new_keys);
424  throw std::bad_alloc();
425  }
426 
427  // auto old_num_filled = _num_filled;
428  auto old_num_buckets = _num_buckets;
429  auto old_states = _states;
430  auto old_keys = _keys;
431 
432  _num_filled = 0;
433  _num_buckets = num_buckets;
434  _mask = _num_buckets - 1;
435  _states = new_states;
436  _keys = new_keys;
437 
438  std::fill_n(_states, num_buckets, State::INACTIVE);
439 
440  _max_probe_length = -1;
441 
442  for (size_t src_bucket=0; src_bucket<old_num_buckets; src_bucket++) {
443  if (old_states[src_bucket] == State::FILLED) {
444  auto& src = old_keys[src_bucket];
445 
446  auto dst_bucket = find_empty_bucket(src);
447  DCHECK_NE_F(dst_bucket, (size_t)-1);
448  DCHECK_NE_F(_states[dst_bucket], State::FILLED);
449  _states[dst_bucket] = State::FILLED;
450  new(_keys + dst_bucket) KeyT(std::move(src));
451  _num_filled += 1;
452 
453  src.~KeyT();
454  }
455  }
456 
457  // DCHECK_EQ_F(old_num_filled, _num_filled);
458 
459  free(old_states);
460  free(old_keys);
461  }
462 
463 private:
464  // Can we fit another element?
465  void check_expand_need()
466  {
467  reserve(_num_filled + 1);
468  }
469 
470  // Find the bucket with this key, or return (size_t)-1
471  size_t find_filled_bucket(const KeyT& key) const
472  {
473  if (empty()) { return (size_t)-1; } // Optimization
474 
475  auto hash_value = _hasher(key);
476  for (int offset=0; offset<=_max_probe_length; ++offset) {
477  auto bucket = (hash_value + offset) & _mask;
478  if (_states[bucket] == State::FILLED) {
479  if (_eq(_keys[bucket], key)) {
480  return bucket;
481  }
482  } else if (_states[bucket] == State::INACTIVE) {
483  return (size_t)-1; // End of the chain!
484  }
485  }
486  return (size_t)-1;
487  }
488 
489  // Find the bucket with this key, or return a good empty bucket to place the key in.
490  // In the latter case, the bucket is expected to be filled.
491  size_t find_or_allocate(const KeyT& key)
492  {
493  auto hash_value = _hasher(key);
494  size_t hole = (size_t)-1;
495  int offset=0;
496  for (; offset<=_max_probe_length; ++offset) {
497  auto bucket = (hash_value + offset) & _mask;
498 
499  if (_states[bucket] == State::FILLED) {
500  if (_eq(_keys[bucket], key)) {
501  return bucket;
502  }
503  } else if (_states[bucket] == State::INACTIVE) {
504  return bucket;
505  } else {
506  // ACTIVE: keep searching
507  if (hole == (size_t)-1) {
508  hole = bucket;
509  }
510  }
511  }
512 
513  // No key found - but maybe a hole for it
514  DCHECK_EQ_F(offset, _max_probe_length+1);
515 
516  if (hole != (size_t)-1) {
517  return hole;
518  }
519 
520  // No hole found within _max_probe_length
521  for (; ; ++offset) {
522  auto bucket = (hash_value + offset) & _mask;
523 
524  if (_states[bucket] != State::FILLED) {
525  _max_probe_length = offset;
526  return bucket;
527  }
528  }
529  }
530 
531  // key is not in this map. Find a place to put it.
532  size_t find_empty_bucket(const KeyT& key)
533  {
534  auto hash_value = _hasher(key);
535  for (int offset=0; ; ++offset) {
536  auto bucket = (hash_value + offset) & _mask;
537  if (_states[bucket] != State::FILLED) {
538  if (offset > _max_probe_length) {
539  _max_probe_length = offset;
540  }
541  return bucket;
542  }
543  }
544  }
545 
546 private:
547  enum class State : uint8_t
548  {
549  INACTIVE, // Never been touched
550  ACTIVE, // Is inside a search-chain, but is empty
551  FILLED // Is set with key/value
552  };
553 
554  HashT _hasher;
555  EqT _eq;
556  State* _states = nullptr;
557  KeyT* _keys = nullptr;
558  size_t _num_buckets = 0;
559  size_t _num_filled = 0;
560  int _max_probe_length = -1; // Our longest bucket-brigade is this long. ONLY when we have zero elements is this ever negative (-1).
561  size_t _mask = 0; // _num_buckets minus one
562 };
563 
564 } // namespace emilib
Definition: hash_set.hpp:40
void insert_unique(KeyT key)
Same as above, but contains(key) MUST be false.
Definition: hash_set.hpp:356
iterator erase(iterator it)
Definition: hash_set.hpp:385
std::pair< iterator, bool > insert(KeyT &&key)
Definition: hash_set.hpp:326
A cache-friendly hash set with open addressing, linear probing and power-of-two capacity.
Definition: hash_set.hpp:29
void reserve(size_t num_elems)
Make room for this many elements.
Definition: hash_set.hpp:409
Definition: hash_set.hpp:107
std::pair< iterator, bool > insert(const KeyT &key)
Definition: hash_set.hpp:306
bool erase(const KeyT &key)
Definition: hash_set.hpp:370
like std::equal_to but no need to #include <functional>
Definition: hash_set.hpp:19
Definition: coroutine.hpp:18
void clear()
Remove all elements, keeping full capacity.
Definition: hash_set.hpp:396