diff options
author | Bad Diode <bd@badd10de.dev> | 2021-10-30 10:13:11 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2021-10-30 10:13:11 +0200 |
commit | af3f2ebc42bf9dc64fc581b9f8f79e98895cb417 (patch) | |
tree | 5065fe2cc5ac04a7dcd2558f97e01222721d0664 /src/hashtable.h | |
parent | f9a6691243d59915dad8785a321ca021bb27de27 (diff) | |
download | bdl-af3f2ebc42bf9dc64fc581b9f8f79e98895cb417.tar.gz bdl-af3f2ebc42bf9dc64fc581b9f8f79e98895cb417.zip |
Add hashtable for Environment tracking
Diffstat (limited to 'src/hashtable.h')
-rw-r--r-- | src/hashtable.h | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/src/hashtable.h b/src/hashtable.h new file mode 100644 index 0000000..faa8591 --- /dev/null +++ b/src/hashtable.h | |||
@@ -0,0 +1,180 @@ | |||
1 | #ifndef BDL_HASHTABLE_H | ||
2 | #define BDL_HASHTABLE_H | ||
3 | |||
4 | #include "darray.h" | ||
5 | |||
6 | // Minimum table capacity. | ||
7 | #define HT_MIN_CAP 4 | ||
8 | #define HT_MIN_SHIFT 2 | ||
9 | |||
10 | // Adjust the load factor threshold at which the table will grow on insertion. | ||
11 | #define HT_LOAD_THRESHOLD 0.8 | ||
12 | |||
13 | typedef struct HashTablePair { | ||
14 | void *key; | ||
15 | void *value; | ||
16 | } HashTablePair; | ||
17 | |||
18 | struct HashTable; | ||
19 | typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes); | ||
20 | typedef bool (EqFunc)(void *a, void *b); | ||
21 | |||
22 | typedef struct HashTable { | ||
23 | // All available key-value pairs as a dynamic array. | ||
24 | HashTablePair *pairs; | ||
25 | |||
26 | // Hash function. | ||
27 | HashFunction *hash_func; | ||
28 | |||
29 | // Equality function. | ||
30 | EqFunc *eq_func; | ||
31 | |||
32 | // This table expects the number of buckets to grow in powers of two. To | ||
33 | // speedup the default hashing, we memoize the number of bits equivalent to | ||
34 | // that power of 2: | ||
35 | // | ||
36 | // cap := 1024 = 2 ^ 10, shift_amount := 10 | ||
37 | // | ||
38 | uint8_t shift_amount; | ||
39 | } HashTable; | ||
40 | |||
41 | // Hash a byte stream using a circular shift + XOR hash function. | ||
42 | static inline uint64_t | ||
43 | _xor_shift_hash(const char *key, size_t n) { | ||
44 | uint64_t hash = 0x65d9d65f6a19574f; | ||
45 | char *last = (char *)key + n; | ||
46 | while (key != last) { | ||
47 | hash ^= (uint64_t)*key++; | ||
48 | hash = (hash << 8) | (hash >> (64 - 8)); | ||
49 | } | ||
50 | return hash; | ||
51 | } | ||
52 | |||
53 | // Use Fibonacci hashing to map a hash to a value in range of the table. | ||
54 | static inline uint64_t | ||
55 | _fibonacci_hash(uint64_t hash, size_t shift_amount) { | ||
56 | return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); | ||
57 | } | ||
58 | |||
59 | static inline float | ||
60 | ht_load_factor(const HashTable *table) { | ||
61 | return (float)array_size(table->pairs) / (float)array_cap(table->pairs); | ||
62 | } | ||
63 | |||
64 | HashTable * | ||
65 | ht_init(HashFunction *hash_func, EqFunc *eq_func) { | ||
66 | HashTable *table = malloc(sizeof(HashTable)); | ||
67 | *table = (HashTable){0}; | ||
68 | array_init(table->pairs, HT_MIN_CAP); | ||
69 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
70 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
71 | } | ||
72 | table->shift_amount = HT_MIN_SHIFT; | ||
73 | table->hash_func = hash_func; | ||
74 | table->eq_func = eq_func; | ||
75 | return table; | ||
76 | } | ||
77 | |||
78 | void | ||
79 | _ht_insert(HashTable *table, void *key, void *value) { | ||
80 | size_t position = table->hash_func(table, key); | ||
81 | size_t probe_position = position; | ||
82 | |||
83 | // Verify the key in that position is free. If not, use linear probing to | ||
84 | // find the next free slot. | ||
85 | HashTablePair *pairs = table->pairs; | ||
86 | bool update = false; | ||
87 | while (true) { | ||
88 | if (pairs[probe_position].key == NULL) { | ||
89 | array_head(pairs)->size++; | ||
90 | break; | ||
91 | } | ||
92 | if (table->eq_func(pairs[probe_position].key, key)) { | ||
93 | update = true; | ||
94 | break; | ||
95 | } | ||
96 | if (probe_position == array_cap(pairs) - 1) { | ||
97 | probe_position = 0; | ||
98 | } else { | ||
99 | probe_position++; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | if (!update) { | ||
104 | pairs[probe_position].key = key; | ||
105 | pairs[probe_position].value = value; | ||
106 | } else { | ||
107 | pairs[probe_position].value = value; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | void | ||
112 | _ht_maybe_grow(HashTable *table) { | ||
113 | if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { | ||
114 | return; | ||
115 | } | ||
116 | |||
117 | // Create a new array with 2x capacity. | ||
118 | HashTablePair *old_pairs = table->pairs; | ||
119 | table->pairs = NULL; | ||
120 | array_init(table->pairs, array_cap(old_pairs) * 2); | ||
121 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
122 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
123 | } | ||
124 | table->shift_amount++; | ||
125 | |||
126 | // Hash everything in the table for the new array capacity. | ||
127 | for (size_t i = 0; i < array_cap(old_pairs); i++) { | ||
128 | if (old_pairs[i].key != NULL) { | ||
129 | _ht_insert(table, old_pairs[i].key, old_pairs[i].value); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | // Free old arrays. | ||
134 | array_free(old_pairs); | ||
135 | } | ||
136 | |||
137 | void | ||
138 | ht_insert(HashTable *table, void *key, void *value) { | ||
139 | _ht_maybe_grow(table); | ||
140 | _ht_insert(table, key, value); | ||
141 | return; | ||
142 | } | ||
143 | |||
144 | void * | ||
145 | ht_lookup(const HashTable *table, void *key) { | ||
146 | size_t position = table->hash_func(table, key); | ||
147 | size_t probe_position = position; | ||
148 | |||
149 | // Verify the key in that position is the same. If not perform linear | ||
150 | // probing to find it. | ||
151 | HashTablePair *pairs = table->pairs; | ||
152 | while (true) { | ||
153 | if (pairs[probe_position].key == NULL) { | ||
154 | return NULL; | ||
155 | } | ||
156 | if (table->eq_func(pairs[probe_position].key, key)) { | ||
157 | break; | ||
158 | } | ||
159 | if (probe_position == array_cap(pairs) - 1) { | ||
160 | probe_position = 0; | ||
161 | } else { | ||
162 | probe_position++; | ||
163 | } | ||
164 | if (probe_position == position) { | ||
165 | return NULL; | ||
166 | } | ||
167 | } | ||
168 | return pairs[probe_position].value; | ||
169 | } | ||
170 | |||
171 | void | ||
172 | ht_free(HashTable *table) { | ||
173 | if (table == NULL) { | ||
174 | return; | ||
175 | } | ||
176 | array_free(table->pairs); | ||
177 | free(table); | ||
178 | } | ||
179 | |||
180 | #endif // BDL_HASHTABLE_H | ||