Roc Toolkit internal modules
Roc Toolkit: real-time audio streaming
Loading...
Searching...
No Matches
hashmap.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Roc Streaming authors
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 */
8
9//! @file roc_core/hashmap.h
10//! @brief Intrusive hash table.
11
12#ifndef ROC_CORE_HASHMAP_H_
13#define ROC_CORE_HASHMAP_H_
14
17#include "roc_core/hashsum.h"
18#include "roc_core/iallocator.h"
22#include "roc_core/panic.h"
23#include "roc_core/stddefs.h"
24
25namespace roc {
26namespace core {
27
28//! Intrusive hash table.
29//!
30//! Characteristics:
31//! 1) Intrusive. Hash table nodes are stored directly in elements. No allocations
32//! are needed to insert a node. Allocator is used only to allocate an array
33//! of buckets.
34//! 2) Collision-chaining. Implemented as an array of buckets, where a bucket is
35//! the head of a doubly-linked lists of bucket elements.
36//! 3) Controllable allocations. Allocations and deallocations are performed only
37//! when the hash table is explicitly growed. All other operations don't touch
38//! allocator.
39//! 4) Zero allocations for small hash tables. A fixed number of buckets can be
40//! embedded directly into hash table object.
41//! 5) Incremental rehashing. After hash table growth, rehashing is performed
42//! incrementally when inserting and removing elements. The slower hash table
43//! size growth is, the less overhead rehashing adds to each operation.
44//!
45//! Incremental rehashing technique is inspired by Go's map implementation, though
46//! there are differeces. Load factor value is from Java's Hashmap implementation.
47//! Prime numbers for sizes are from https://planetmath.org/goodhashtableprimes.
48//!
49//! @tparam T defines object type, it should inherit HashmapNode and additionally
50//! implement three methods:
51//!
52//! @code
53//! // compute key hash
54//! static core::hashsum_t key_hash(Key key);
55//!
56//! // compare two keys for equality
57//! static bool key_equal(Key key1, Key key2);
58//!
59//! // get object key
60//! Key key() const;
61//! @endcode
62//!
63//! "Key" can be any type. Hashmap doesn't use it directly. It is never copied and
64//! is always accessed via the three methods above. The hash is computed for a key
65//! only once when an object is inserted into hash table.
66//!
67//! @tparam EmbeddedCapacity defines the capacity embedded directly into Hashmap.
68//! It is used instead of dynamic memory while the number of elements is smaller
69//! than this capacity. The actual object size occupied to provide the requested
70//! capacity is implementation defined.
71//!
72//! @tparam OwnershipPolicy defines ownership policy which is used to acquire an element
73//! ownership when it's added to the hashmap and release ownership when it's removed
74//! from the hashmap.
75template <class T,
76 size_t EmbeddedCapacity = 0,
77 template <class TT> class OwnershipPolicy = RefCountedOwnership>
78class Hashmap : public NonCopyable<> {
79public:
80 //! Pointer type.
81 //! @remarks
82 //! either raw or smart pointer depending on the ownership policy.
83 typedef typename OwnershipPolicy<T>::Pointer Pointer;
84
85 //! Initialize empty hashmap.
86 Hashmap(IAllocator& allocator)
87 : curr_buckets_(NULL)
88 , n_curr_buckets_(0)
89 , prev_buckets_(NULL)
90 , n_prev_buckets_(0)
91 , size_(0)
92 , rehash_pos_(0)
93 , rehash_remain_nodes_(0)
94 , allocator_(allocator) {
95 if (EmbeddedCapacity != 0) {
96 if (!realloc_buckets_(NumEmbeddedBuckets)) {
97 roc_panic("hashmap: initialization failed");
98 }
99 }
100 }
101
102 //! Release ownership of all elements.
104 release_bucket_array_(curr_buckets_, n_curr_buckets_);
105 release_bucket_array_(prev_buckets_, n_prev_buckets_);
106
107 dealloc_buckets_();
108 }
109
110 //! Get maximum number of elements that can be added to hashmap before
111 //! grow() should be called.
112 size_t capacity() const {
113 return buckets_capacity_(n_curr_buckets_);
114 }
115
116 //! Get number of elements added to hashmap.
117 size_t size() const {
118 return size_;
119 }
120
121 //! Check if element belongs to hashmap.
122 //!
123 //! @note
124 //! - has O(1) complexity
125 //! - doesn't compute key hashes
126 bool contains(const T& element) const {
127 const HashmapNode::HashmapNodeData* node = element.hashmap_node_data();
128
129 if (member_of_bucket_array_(curr_buckets_, n_curr_buckets_, node)) {
130 return true;
131 }
132
133 if (member_of_bucket_array_(prev_buckets_, n_prev_buckets_, node)) {
134 return true;
135 }
136
137 return false;
138 }
139
140 //! Find element in the hashmap by key.
141 //!
142 //! @returns
143 //! Pointer to the element with given key or NULL if it's not found.
144 //!
145 //! @note
146 //! - has O(1) complexity in average and O(n) in the worst case
147 //! - computes key hash
148 //!
149 //! @note
150 //! The worst case is achieved when the hash function produces many collisions.
151 template <class Key> Pointer find(const Key& key) const {
152 const hashsum_t hash = T::key_hash(key);
153
154 return find_node_(hash, key);
155 }
156
157 //! Insert element into hashmap.
158 //!
159 //! @remarks
160 //! - acquires ownership of @p element
161 //!
162 //! @pre
163 //! - hashmap size() should be smaller than hashmap capacity()
164 //! - @p element should not be member of any hashmap
165 //! - hashmap shouldn't have an element with the same key
166 //!
167 //! @note
168 //! - has O(1) complexity in average and O(n) in the worst case
169 //! - computes key hash
170 //! - doesn't make allocations or deallocations
171 //! - proceedes lazy rehashing
172 //!
173 //! @note
174 //! Insertion speed is higher when insert to remove ratio is close to one or lower,
175 //! and slows down when it becomes higher than one. The slow down is caused by
176 //! the incremental rehashing algorithm.
177 void insert(T& element) {
178 if (size_ >= buckets_capacity_(n_curr_buckets_)) {
179 roc_panic(
180 "hashmap: attempt to insert into full hashmap before calling grow()");
181 }
182
183 HashmapNode::HashmapNodeData* node = element.hashmap_node_data();
184
185 if (node->bucket != NULL) {
186 roc_panic(
187 "hashmap:"
188 " attempt to insert an element which is already a member of %s hashmap",
189 contains(element) ? "this" : "another");
190 }
191
192 const hashsum_t hash = T::key_hash(element.key());
193
194 if (find_node_(hash, element.key())) {
195 roc_panic("hashmap: attempt to insert an element with duplicate key");
196 }
197
198 Bucket& bucket = select_bucket_(hash);
199
200 node->hash = hash;
201 insert_node_(bucket, node);
202
203 proceed_rehash_(true);
204
205 OwnershipPolicy<T>::acquire(element);
206 }
207
208 //! Remove element from hashmap.
209 //!
210 //! @remarks
211 //! - releases ownership of @p element
212 //!
213 //! @pre
214 //! @p element should be member of this hashmap.
215 //!
216 //! @note
217 //! - has O(1) complexity
218 //! - doesn't compute key hash
219 //! - doesn't make allocations or deallocations
220 //! - proceedes lazy rehashing
221 void remove(T& element) {
222 HashmapNode::HashmapNodeData* node = element.hashmap_node_data();
223
224 if (!contains(element)) {
225 roc_panic("hashmap:"
226 " attempt to remove an element which is not a member of %s hashmap",
227 node->bucket == NULL ? "any" : "this");
228 }
229
230 remove_node_(node);
231
232 proceed_rehash_(false);
233
234 OwnershipPolicy<T>::release(element);
235 }
236
237 //! Grow hashtable capacity.
238 //!
239 //! @remarks
240 //! Check if hash table is full (size is equal to capacity), and if so, increase
241 //! hash table capacity and initiate incremental rehashing. Rehashing will be
242 //! performed during subsequent insertions and removals.
243 //!
244 //! @returns
245 //! - true if no growth needed or growth succeeded
246 //! - false if allocation failed
247 //!
248 //! @note
249 //! - has O(1) complexity
250 //! - doesn't computes key hashes
251 //! - makes allocations and deallocations
252 //! - doesn't proceed lazy rehashing
253 bool grow() {
254 const size_t cap = buckets_capacity_(n_curr_buckets_);
255 roc_panic_if_not(size_ <= cap);
256
257 if (size_ == cap) {
258 size_t n_buckets = n_curr_buckets_;
259 do {
260 n_buckets = get_prime_larger_than_(n_buckets);
261 } while (size_ >= buckets_capacity_(n_buckets));
262
263 if (!realloc_buckets_(n_buckets)) {
264 return false;
265 }
266
267 const size_t new_cap = buckets_capacity_(n_curr_buckets_);
268 roc_panic_if_not(size_ < new_cap);
269 }
270
271 return true;
272 }
273
274private:
275 enum {
276 // rehash happens when n_elements >= n_buckets * LoafFactorNum / LoafFactorDen
277 LoafFactorNum = 75,
278 LoafFactorDen = 100,
279
280 // how much buckets are embeded directly into Hashmap object
281 NumEmbeddedBuckets =
282 (EmbeddedCapacity * LoafFactorDen + LoafFactorNum - 1) / LoafFactorNum
283 };
284
285 struct Bucket {
286 HashmapNode::HashmapNodeData* head;
287 };
288
289 bool realloc_buckets_(size_t n_buckets) {
290 roc_panic_if_not(n_buckets > 0);
291
292 roc_panic_if_not(rehash_pos_ == 0);
293 roc_panic_if_not(rehash_remain_nodes_ == 0);
294
295 Bucket* buckets;
296 if (n_buckets <= NumEmbeddedBuckets
297 && curr_buckets_ != (Bucket*)embedded_buckets_.memory()) {
298 buckets = (Bucket*)embedded_buckets_.memory();
299 } else {
300 buckets = (Bucket*)allocator_.allocate(n_buckets * sizeof(Bucket));
301 if (buckets == NULL) {
302 return false;
303 }
304 }
305
306 memset(buckets, 0, n_buckets * sizeof(Bucket));
307
308 if (prev_buckets_ && prev_buckets_ != (Bucket*)embedded_buckets_.memory()) {
309 allocator_.deallocate(prev_buckets_);
310 prev_buckets_ = NULL;
311 }
312
313 if (curr_buckets_) {
314 prev_buckets_ = curr_buckets_;
315 n_prev_buckets_ = n_curr_buckets_;
316
317 rehash_pos_ = 0;
318 rehash_remain_nodes_ = size_;
319 }
320
321 curr_buckets_ = buckets;
322 n_curr_buckets_ = n_buckets;
323
324 return true;
325 }
326
327 void dealloc_buckets_() {
328 if (curr_buckets_ && curr_buckets_ != (Bucket*)embedded_buckets_.memory()) {
329 allocator_.deallocate(curr_buckets_);
330 }
331
332 if (prev_buckets_ && prev_buckets_ != (Bucket*)embedded_buckets_.memory()) {
333 allocator_.deallocate(prev_buckets_);
334 }
335 }
336
337 bool member_of_bucket_array_(Bucket* buckets,
338 size_t n_buckets,
339 const HashmapNode::HashmapNodeData* node) const {
340 if (n_buckets == 0) {
341 return false;
342 }
343
344 Bucket* node_bucket = (Bucket*)node->bucket;
345
346 return node_bucket >= buckets && node_bucket < buckets + n_buckets;
347 }
348
349 void release_bucket_array_(Bucket* buckets, size_t n_buckets) {
350 if (n_buckets == 0) {
351 return;
352 }
353
354 for (size_t n = 0; n < n_buckets; n++) {
355 HashmapNode::HashmapNodeData* node = buckets[n].head;
356
357 while (node) {
358 T* elem = static_cast<T*>(node->container_of());
359
360 node->bucket = NULL;
361
362 node = node->next;
363 if (node == buckets[n].head) {
364 node = NULL;
365 }
366
367 OwnershipPolicy<T>::release(*elem);
368 }
369 }
370 }
371
372 template <class Key> T* find_node_(hashsum_t hash, const Key& key) const {
373 if (n_curr_buckets_ != 0) {
374 T* elem = find_in_bucket_(curr_buckets_[hash % n_curr_buckets_], hash, key);
375 if (elem) {
376 return elem;
377 }
378 }
379
380 if (n_prev_buckets_ != 0) {
381 T* elem = find_in_bucket_(prev_buckets_[hash % n_prev_buckets_], hash, key);
382 if (elem) {
383 return elem;
384 }
385 }
386
387 return NULL;
388 }
389
390 template <class Key>
391 T* find_in_bucket_(const Bucket& bucket, hashsum_t hash, const Key& key) const {
392 HashmapNode::HashmapNodeData* node = bucket.head;
393
394 if (node != NULL) {
395 do {
396 if (node->hash == hash) {
397 T* elem = static_cast<T*>(node->container_of());
398
399 if (T::key_equal(elem->key(), key)) {
400 return elem;
401 }
402 }
403
404 node = node->next;
405 } while (node != bucket.head);
406 }
407
408 return NULL;
409 }
410
411 size_t buckets_capacity_(size_t n_buckets) const {
412 return n_buckets * LoafFactorNum / LoafFactorDen;
413 }
414
415 Bucket& select_bucket_(hashsum_t hash) const {
416 roc_panic_if(n_curr_buckets_ == 0);
417
418 return curr_buckets_[hash % n_curr_buckets_];
419 }
420
421 void insert_node_(Bucket& bucket, HashmapNode::HashmapNodeData* node) {
422 if (HashmapNode::HashmapNodeData* head = bucket.head) {
423 node->next = head;
424 node->prev = head->prev;
425
426 head->prev->next = node;
427 head->prev = node;
428 } else {
429 bucket.head = node;
430
431 node->next = node;
432 node->prev = node;
433 }
434
435 node->bucket = (void*)&bucket;
436
437 size_++;
438 }
439
440 void remove_node_(HashmapNode::HashmapNodeData* node) {
441 Bucket& bucket = *(Bucket*)node->bucket;
442
443 if (bucket.head == node) {
444 if (node->next != node) {
445 bucket.head = node->next;
446 } else {
447 bucket.head = NULL;
448 }
449 }
450
451 node->prev->next = node->next;
452 node->next->prev = node->prev;
453
454 if (member_of_bucket_array_(prev_buckets_, n_prev_buckets_, node)) {
455 roc_panic_if_not(rehash_remain_nodes_ > 0);
456 rehash_remain_nodes_--;
457 }
458
459 node->bucket = NULL;
460
461 size_--;
462 }
463
464 void proceed_rehash_(bool in_insert) {
465 if (rehash_remain_nodes_ == 0) {
466 return;
467 }
468
469 size_t num_migrations = 1;
470
471 if (in_insert) {
472 const size_t inserts_until_rehash =
473 buckets_capacity_(n_curr_buckets_) - size_;
474
475 if (inserts_until_rehash == 0) {
476 // migrate all remaining nodes
477 num_migrations = rehash_remain_nodes_;
478 } else {
479 // migrate as much nodes per insert as needed to finish until next rehash
480 num_migrations = (rehash_remain_nodes_ + inserts_until_rehash - 1)
481 / inserts_until_rehash;
482 }
483 }
484
485 for (;;) {
486 roc_panic_if_not(rehash_pos_ < n_prev_buckets_);
487
488 Bucket& bucket = prev_buckets_[rehash_pos_];
489
490 if (bucket.head == NULL) {
491 rehash_pos_++;
492
493 if (rehash_pos_ == n_prev_buckets_) {
494 roc_panic_if_not(rehash_remain_nodes_ == 0);
495
496 rehash_pos_ = 0;
497 n_prev_buckets_ = 0;
498
499 return;
500 }
501 continue;
502 }
503
504 if (num_migrations == 0) {
505 return;
506 }
507
508 migrate_node_(bucket.head);
509 --num_migrations;
510 }
511 }
512
513 void migrate_node_(HashmapNode::HashmapNodeData* node) {
514 remove_node_(node);
515
516 Bucket& bucket = select_bucket_(node->hash);
517
518 insert_node_(bucket, node);
519 }
520
521 size_t get_prime_larger_than_(size_t min) {
522 static const size_t primes[] = {
523 53, 97, 193, 389, 769, 1543, 3079,
524 6151, 12289, 24593, 49157, 98317, 196613, 393241,
525 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653,
526 100663319, 201326611, 402653189, 805306457, 1610612741
527 };
528
529 for (size_t n = 0; n < ROC_ARRAY_SIZE(primes); n++) {
530 if (primes[n] > min) {
531 return primes[n];
532 }
533 }
534
535 roc_panic_if(min * 3 < min);
536 return min * 3;
537 }
538
539 Bucket* curr_buckets_;
540 size_t n_curr_buckets_;
541
542 Bucket* prev_buckets_;
543 size_t n_prev_buckets_;
544
545 size_t size_;
546
547 size_t rehash_pos_;
548 size_t rehash_remain_nodes_;
549
550 IAllocator& allocator_;
551
552 AlignedStorage<NumEmbeddedBuckets * sizeof(Bucket)> embedded_buckets_;
553};
554
555} // namespace core
556} // namespace roc
557
558#endif // ROC_CORE_HASHMAP_H_
Aligned storage.
const void * memory() const
Get storage memory.
Intrusive hash table.
Definition: hashmap.h:78
size_t size() const
Get number of elements added to hashmap.
Definition: hashmap.h:117
~Hashmap()
Release ownership of all elements.
Definition: hashmap.h:103
void remove(T &element)
Remove element from hashmap.
Definition: hashmap.h:221
Hashmap(IAllocator &allocator)
Initialize empty hashmap.
Definition: hashmap.h:86
bool contains(const T &element) const
Check if element belongs to hashmap.
Definition: hashmap.h:126
void insert(T &element)
Insert element into hashmap.
Definition: hashmap.h:177
OwnershipPolicy< T >::Pointer Pointer
Pointer type.
Definition: hashmap.h:83
size_t capacity() const
Get maximum number of elements that can be added to hashmap before grow() should be called.
Definition: hashmap.h:112
Pointer find(const Key &key) const
Find element in the hashmap by key.
Definition: hashmap.h:151
bool grow()
Grow hashtable capacity.
Definition: hashmap.h:253
Memory allocator interface.
Definition: iallocator.h:23
virtual void deallocate(void *)=0
Deallocate previously allocated memory.
virtual void * allocate(size_t size)=0
Allocate memory.
Base class for non-copyable objects.
Definition: noncopyable.h:23
Hashmap node.
Hash sum.
Memory allocator interface.
Helper macros.
#define ROC_ARRAY_SIZE(a)
Get number of elements in a static array.
Definition: macro_helpers.h:24
size_t hashsum_t
Hash type.
Definition: hashsum.h:21
Root namespace.
Non-copyable object.
Ownership policies.
Panic.
#define roc_panic_if_not(x)
Panic if condition is false.
Definition: panic.h:37
#define roc_panic_if(x)
Panic if condition is true.
Definition: panic.h:26
#define roc_panic(...)
Print error message and terminate program gracefully.
Definition: panic.h:50
Commonly used types and functions.
void * bucket
The bucket this node belongs to.
Definition: hashmap_node.h:43
hashsum_t hash
Cached node hash.
Definition: hashmap_node.h:38