diff options
Diffstat (limited to 'src/bytecode/hashtable.h')
-rw-r--r-- | src/bytecode/hashtable.h | 208 |
1 files changed, 0 insertions, 208 deletions
diff --git a/src/bytecode/hashtable.h b/src/bytecode/hashtable.h deleted file mode 100644 index 7c0c380..0000000 --- a/src/bytecode/hashtable.h +++ /dev/null | |||
@@ -1,208 +0,0 @@ | |||
1 | #ifndef BDL_HASHTABLE_H | ||
2 | #define BDL_HASHTABLE_H | ||
3 | |||
4 | #include "darray.h" | ||
5 | #include "objects.h" | ||
6 | |||
7 | // Minimum table capacity. | ||
8 | #define HT_MIN_CAP 4 | ||
9 | #define HT_MIN_SHIFT 2 | ||
10 | |||
11 | // Adjust the load factor threshold at which the table will grow on insertion. | ||
12 | #define HT_LOAD_THRESHOLD 0.8 | ||
13 | |||
14 | typedef struct HashTablePair { | ||
15 | Object *key; | ||
16 | Object *value; | ||
17 | } HashTablePair; | ||
18 | |||
19 | struct HashTable; | ||
20 | typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes); | ||
21 | |||
22 | typedef struct HashTable { | ||
23 | // All available key-value pairs as a dynamic array. | ||
24 | HashTablePair *pairs; | ||
25 | |||
26 | // Internal storage. | ||
27 | Object *keys; | ||
28 | Object *values; | ||
29 | |||
30 | // Hash function. | ||
31 | HashFunction *hash_func; | ||
32 | |||
33 | // This table expects the number of buckets to grow in powers of two. To | ||
34 | // speedup the default hashing, we memoize the number of bits equivalent to | ||
35 | // that power of 2: | ||
36 | // | ||
37 | // cap := 1024 = 2 ^ 10, shift_amount := 10 | ||
38 | // | ||
39 | uint8_t shift_amount; | ||
40 | } HashTable; | ||
41 | |||
42 | // Hash a byte stream using a circular shift + XOR hash function. | ||
43 | static inline uint64_t | ||
44 | _xor_shift_hash(const char *key, size_t n) { | ||
45 | uint64_t hash = 0x65d9d65f6a19574f; | ||
46 | char *last = (char *)key + n; | ||
47 | while (key != last) { | ||
48 | hash ^= (uint64_t)*key++; | ||
49 | hash = (hash << 8) | (hash >> (64 - 8)); | ||
50 | } | ||
51 | return hash; | ||
52 | } | ||
53 | |||
54 | // Use Fibonacci hashing to map a hash to a value in range of the table. | ||
55 | static inline uint64_t | ||
56 | _fibonacci_hash(uint64_t hash, size_t shift_amount) { | ||
57 | return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); | ||
58 | } | ||
59 | |||
60 | uint64_t | ||
61 | _symbol_hash(const HashTable *table, void *key) { | ||
62 | Object *obj = key; | ||
63 | uint64_t hash = _xor_shift_hash(obj->text, array_size(obj->text)); | ||
64 | hash = _fibonacci_hash(hash, table->shift_amount); | ||
65 | return hash; | ||
66 | } | ||
67 | |||
68 | static inline float | ||
69 | ht_load_factor(const HashTable *table) { | ||
70 | return (float)array_size(table->pairs) / (float)array_cap(table->pairs); | ||
71 | } | ||
72 | |||
73 | HashTable * | ||
74 | ht_init(void) { | ||
75 | HashTable *table = malloc(sizeof(HashTable)); | ||
76 | *table = (HashTable){0}; | ||
77 | array_init(table->pairs, HT_MIN_CAP); | ||
78 | array_init(table->keys, HT_MIN_CAP); | ||
79 | array_init(table->values, HT_MIN_CAP); | ||
80 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
81 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
82 | } | ||
83 | table->shift_amount = HT_MIN_SHIFT; | ||
84 | table->hash_func = _symbol_hash; | ||
85 | return table; | ||
86 | } | ||
87 | |||
88 | void | ||
89 | _ht_insert(HashTable *table, Object key, Object value) { | ||
90 | size_t position = table->hash_func(table, &key); | ||
91 | size_t probe_position = position; | ||
92 | |||
93 | // Verify the key in that position is free. If not, use linear probing to | ||
94 | // find the next free slot. | ||
95 | HashTablePair *pairs = table->pairs; | ||
96 | bool update = false; | ||
97 | while (true) { | ||
98 | if (pairs[probe_position].key == NULL) { | ||
99 | array_head(pairs)->size++; | ||
100 | break; | ||
101 | } | ||
102 | if (object_equal(*pairs[probe_position].key, key)) { | ||
103 | update = true; | ||
104 | break; | ||
105 | } | ||
106 | if (probe_position == array_cap(pairs) - 1) { | ||
107 | probe_position = 0; | ||
108 | } else { | ||
109 | probe_position++; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | if (!update) { | ||
114 | array_push(table->keys, object_copy(key)); | ||
115 | array_push(table->values, value); | ||
116 | pairs[probe_position].key = &table->keys[array_size(table->keys) - 1]; | ||
117 | pairs[probe_position].value = &table->values[array_size(table->values) - 1]; | ||
118 | } else { | ||
119 | *pairs[probe_position].value = value; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | void | ||
124 | _ht_maybe_grow(HashTable *table) { | ||
125 | if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { | ||
126 | return; | ||
127 | } | ||
128 | |||
129 | // Create a new array with 2x capacity. | ||
130 | HashTablePair *old_pairs = table->pairs; | ||
131 | Object *old_keys = table->keys; | ||
132 | Object *old_values = table->values; | ||
133 | table->pairs = NULL; | ||
134 | table->keys = NULL; | ||
135 | table->values = NULL; | ||
136 | array_init(table->pairs, array_cap(old_pairs) * 2); | ||
137 | array_init(table->keys, array_cap(old_pairs) * 2); | ||
138 | array_init(table->values, array_cap(old_pairs) * 2); | ||
139 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
140 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
141 | } | ||
142 | table->shift_amount++; | ||
143 | |||
144 | // Hash everything in the table for the new array capacity. | ||
145 | for (size_t i = 0; i < array_cap(old_pairs); i++) { | ||
146 | if (old_pairs[i].key != NULL) { | ||
147 | _ht_insert(table, *old_pairs[i].key, *old_pairs[i].value); | ||
148 | } | ||
149 | } | ||
150 | |||
151 | // Free old arrays. | ||
152 | array_free(old_pairs); | ||
153 | for (size_t i = 0; i < array_size(old_keys); i++) { | ||
154 | object_free(&old_keys[i]); | ||
155 | } | ||
156 | array_free(old_keys); | ||
157 | array_free(old_values); | ||
158 | } | ||
159 | |||
160 | void | ||
161 | ht_insert(HashTable *table, Object key, Object value) { | ||
162 | _ht_maybe_grow(table); | ||
163 | _ht_insert(table, key, value); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | Object * | ||
168 | ht_lookup(const HashTable *table, Object key) { | ||
169 | size_t position = table->hash_func(table, &key); | ||
170 | size_t probe_position = position; | ||
171 | |||
172 | // Verify the key in that position is the same. If not perform linear | ||
173 | // probing to find it. | ||
174 | HashTablePair *pairs = table->pairs; | ||
175 | while (true) { | ||
176 | if (pairs[probe_position].key == NULL) { | ||
177 | return NULL; | ||
178 | } | ||
179 | if (object_equal(*pairs[probe_position].key, key)) { | ||
180 | break; | ||
181 | } | ||
182 | if (probe_position == array_cap(pairs) - 1) { | ||
183 | probe_position = 0; | ||
184 | } else { | ||
185 | probe_position++; | ||
186 | } | ||
187 | if (probe_position == position) { | ||
188 | return NULL; | ||
189 | } | ||
190 | } | ||
191 | return pairs[probe_position].value; | ||
192 | } | ||
193 | |||
194 | void | ||
195 | ht_free(HashTable *table) { | ||
196 | if (table == NULL) { | ||
197 | return; | ||
198 | } | ||
199 | array_free(table->pairs); | ||
200 | for (size_t i = 0; i < array_size(table->keys); i++) { | ||
201 | object_free(&table->keys[i]); | ||
202 | } | ||
203 | array_free(table->keys); | ||
204 | array_free(table->values); | ||
205 | free(table); | ||
206 | } | ||
207 | |||
208 | #endif // BDL_HASHTABLE_H | ||