aboutsummaryrefslogtreecommitdiffstats
path: root/src/treewalk/hashtable.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/treewalk/hashtable.h')
-rw-r--r--src/treewalk/hashtable.h191
1 files changed, 191 insertions, 0 deletions
diff --git a/src/treewalk/hashtable.h b/src/treewalk/hashtable.h
new file mode 100644
index 0000000..8f210e3
--- /dev/null
+++ b/src/treewalk/hashtable.h
@@ -0,0 +1,191 @@
1#ifndef BDL_HASHTABLE_H
2#define BDL_HASHTABLE_H
3
4#include "darray.h"
5#include "objects.h"
6
7// Minimum table capacity.
8#define HT_MIN_CAP 4
9#define HT_MIN_SHIFT 2
10
11// Adjust the load factor threshold at which the table will grow on insertion.
12#define HT_LOAD_THRESHOLD 0.8
13
14typedef struct HashTablePair {
15 Object *key;
16 Object *value;
17} HashTablePair;
18
19typedef struct HashTable {
20 // All available key-value pairs as a dynamic array.
21 HashTablePair *pairs;
22
23 // This table expects the number of buckets to grow in powers of two. To
24 // speedup the default hashing, we memoize the number of bits equivalent to
25 // that power of 2:
26 //
27 // cap := 1024 = 2 ^ 10, shift_amount := 10
28 //
29 uint8_t shift_amount;
30} HashTable;
31
32// Hash a byte stream using a circular shift + XOR hash function.
33static inline uint64_t
34_xor_shift_hash(const char *key, size_t n) {
35 uint64_t hash = 0x65d9d65f6a19574f;
36 char *last = (char *)key + n;
37 while (key != last) {
38 hash ^= (uint64_t)*key++;
39 hash = (hash << 8) | (hash >> (64 - 8));
40 }
41 return hash;
42}
43
44// Use Fibonacci hashing to map a hash to a value in range of the table.
45static inline uint64_t
46_fibonacci_hash(uint64_t hash, size_t shift_amount) {
47 return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount);
48}
49
50uint64_t
51ht_hash(const HashTable *table, const Object *key) {
52 uint64_t hash = 0;
53 switch (key->type) {
54 case OBJ_TYPE_FIXNUM: {
55 hash = key->fixnum;
56 } break;
57 case OBJ_TYPE_STRING: {
58 hash = _xor_shift_hash(key->string, array_size(key->string));
59 } break;
60 case OBJ_TYPE_SYMBOL: {
61 hash = _xor_shift_hash(key->symbol, array_size(key->symbol));
62 } break;
63 case OBJ_TYPE_BOOL:
64 case OBJ_TYPE_NIL:
65 case OBJ_TYPE_PAIR:
66 case OBJ_TYPE_LAMBDA:
67 case OBJ_TYPE_PROCEDURE:
68 case OBJ_TYPE_ERR: {
69 hash = (uintptr_t)key;
70 } break;
71 }
72 hash = _fibonacci_hash(hash, table->shift_amount);
73 return hash;
74}
75
76static inline float
77ht_load_factor(const HashTable *table) {
78 return (float)array_size(table->pairs) / (float)array_cap(table->pairs);
79}
80
81HashTable *
82ht_init(void) {
83 HashTable *table = malloc(sizeof(HashTable));
84 table->pairs = NULL;
85 array_init(table->pairs, HT_MIN_CAP);
86 for (size_t i = 0; i < array_cap(table->pairs); i++) {
87 table->pairs[i] = (HashTablePair){NULL, NULL};
88 }
89 table->shift_amount = HT_MIN_SHIFT;
90 return table;
91}
92
93void
94_ht_insert(HashTable *table, const Object *key, const Object *value) {
95 size_t position = ht_hash(table, key);
96 size_t probe_position = position;
97
98 // Verify the key in that position is free. If not, use linear probing to
99 // find the next free slot.
100 HashTablePair *pairs = table->pairs;
101 while (true) {
102 if (pairs[probe_position].key == NULL) {
103 array_head(pairs)->size++;
104 break;
105 }
106 if (obj_eq(pairs[probe_position].key, key)) {
107 break;
108 }
109 if (probe_position == array_cap(pairs) - 1) {
110 probe_position = 0;
111 } else {
112 probe_position++;
113 }
114 }
115 pairs[probe_position].key = (Object *)key;
116 pairs[probe_position].value = (Object *)value;
117}
118
119void
120_ht_maybe_grow(HashTable *table) {
121 HashTablePair *pairs = table->pairs;
122 if (ht_load_factor(table) < HT_LOAD_THRESHOLD) {
123 return;
124 }
125
126 // Create a new array with 2x capacity.
127 table->pairs = NULL;
128 array_init(table->pairs, array_cap(pairs) * 2);
129 for (size_t i = 0; i < array_cap(table->pairs); i++) {
130 table->pairs[i] = (HashTablePair){NULL, NULL};
131 }
132 table->shift_amount++;
133
134 // Hash everything in the table for the new array capacity.
135 for (size_t i = 0; i < array_cap(pairs); i++) {
136 if (pairs[i].key != NULL) {
137 _ht_insert(table, pairs[i].key, pairs[i].value);
138 }
139 }
140
141 // Free the old array.
142 array_free(pairs);
143}
144
145void
146ht_insert(HashTable *table, const Object *key, const Object *value) {
147 _ht_maybe_grow(table);
148 _ht_insert(table, key, value);
149 return;
150}
151
152Object *
153ht_lookup(const HashTable *table, const Object *key) {
154 size_t position = ht_hash(table, key);
155 size_t probe_position = position;
156
157 // Verify the key in that position is the same. If not perform linear
158 // probing to find it.
159 HashTablePair *pairs = table->pairs;
160 while (true) {
161 if (pairs[probe_position].key == NULL) {
162 return NULL;
163 }
164 if (obj_eq(pairs[probe_position].key, key)) {
165 break;
166 }
167 if (probe_position == array_cap(pairs) - 1) {
168 probe_position = 0;
169 } else {
170 probe_position++;
171 }
172 if (probe_position == position) {
173 return NULL;
174 }
175 }
176 return pairs[probe_position].value;
177}
178
179void
180ht_free(HashTable *table) {
181 if (table == NULL) {
182 return;
183 }
184 if (table->pairs == NULL) {
185 return;
186 }
187 array_free(table->pairs);
188 free(table);
189}
190
191#endif // BDL_HASHTABLE_H