diff options
Diffstat (limited to 'src/bootstrap/hashtable.h')
-rw-r--r-- | src/bootstrap/hashtable.h | 191 |
1 files changed, 0 insertions, 191 deletions
diff --git a/src/bootstrap/hashtable.h b/src/bootstrap/hashtable.h deleted file mode 100644 index 8f210e3..0000000 --- a/src/bootstrap/hashtable.h +++ /dev/null | |||
@@ -1,191 +0,0 @@ | |||
1 | #ifndef BDL_HASHTABLE_H | ||
2 | #define BDL_HASHTABLE_H | ||
3 | |||
4 | #include "darray.h" | ||
5 | #include "objects.h" | ||
6 | |||
7 | // Minimum table capacity. | ||
8 | #define HT_MIN_CAP 4 | ||
9 | #define HT_MIN_SHIFT 2 | ||
10 | |||
11 | // Adjust the load factor threshold at which the table will grow on insertion. | ||
12 | #define HT_LOAD_THRESHOLD 0.8 | ||
13 | |||
14 | typedef struct HashTablePair { | ||
15 | Object *key; | ||
16 | Object *value; | ||
17 | } HashTablePair; | ||
18 | |||
19 | typedef struct HashTable { | ||
20 | // All available key-value pairs as a dynamic array. | ||
21 | HashTablePair *pairs; | ||
22 | |||
23 | // This table expects the number of buckets to grow in powers of two. To | ||
24 | // speedup the default hashing, we memoize the number of bits equivalent to | ||
25 | // that power of 2: | ||
26 | // | ||
27 | // cap := 1024 = 2 ^ 10, shift_amount := 10 | ||
28 | // | ||
29 | uint8_t shift_amount; | ||
30 | } HashTable; | ||
31 | |||
32 | // Hash a byte stream using a circular shift + XOR hash function. | ||
33 | static inline uint64_t | ||
34 | _xor_shift_hash(const char *key, size_t n) { | ||
35 | uint64_t hash = 0x65d9d65f6a19574f; | ||
36 | char *last = (char *)key + n; | ||
37 | while (key != last) { | ||
38 | hash ^= (uint64_t)*key++; | ||
39 | hash = (hash << 8) | (hash >> (64 - 8)); | ||
40 | } | ||
41 | return hash; | ||
42 | } | ||
43 | |||
44 | // Use Fibonacci hashing to map a hash to a value in range of the table. | ||
45 | static inline uint64_t | ||
46 | _fibonacci_hash(uint64_t hash, size_t shift_amount) { | ||
47 | return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); | ||
48 | } | ||
49 | |||
50 | uint64_t | ||
51 | ht_hash(const HashTable *table, const Object *key) { | ||
52 | uint64_t hash = 0; | ||
53 | switch (key->type) { | ||
54 | case OBJ_TYPE_FIXNUM: { | ||
55 | hash = key->fixnum; | ||
56 | } break; | ||
57 | case OBJ_TYPE_STRING: { | ||
58 | hash = _xor_shift_hash(key->string, array_size(key->string)); | ||
59 | } break; | ||
60 | case OBJ_TYPE_SYMBOL: { | ||
61 | hash = _xor_shift_hash(key->symbol, array_size(key->symbol)); | ||
62 | } break; | ||
63 | case OBJ_TYPE_BOOL: | ||
64 | case OBJ_TYPE_NIL: | ||
65 | case OBJ_TYPE_PAIR: | ||
66 | case OBJ_TYPE_LAMBDA: | ||
67 | case OBJ_TYPE_PROCEDURE: | ||
68 | case OBJ_TYPE_ERR: { | ||
69 | hash = (uintptr_t)key; | ||
70 | } break; | ||
71 | } | ||
72 | hash = _fibonacci_hash(hash, table->shift_amount); | ||
73 | return hash; | ||
74 | } | ||
75 | |||
76 | static inline float | ||
77 | ht_load_factor(const HashTable *table) { | ||
78 | return (float)array_size(table->pairs) / (float)array_cap(table->pairs); | ||
79 | } | ||
80 | |||
81 | HashTable * | ||
82 | ht_init(void) { | ||
83 | HashTable *table = malloc(sizeof(HashTable)); | ||
84 | table->pairs = NULL; | ||
85 | array_init(table->pairs, HT_MIN_CAP); | ||
86 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
87 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
88 | } | ||
89 | table->shift_amount = HT_MIN_SHIFT; | ||
90 | return table; | ||
91 | } | ||
92 | |||
93 | void | ||
94 | _ht_insert(HashTable *table, const Object *key, const Object *value) { | ||
95 | size_t position = ht_hash(table, key); | ||
96 | size_t probe_position = position; | ||
97 | |||
98 | // Verify the key in that position is free. If not, use linear probing to | ||
99 | // find the next free slot. | ||
100 | HashTablePair *pairs = table->pairs; | ||
101 | while (true) { | ||
102 | if (pairs[probe_position].key == NULL) { | ||
103 | array_head(pairs)->size++; | ||
104 | break; | ||
105 | } | ||
106 | if (obj_eq(pairs[probe_position].key, key)) { | ||
107 | break; | ||
108 | } | ||
109 | if (probe_position == array_cap(pairs) - 1) { | ||
110 | probe_position = 0; | ||
111 | } else { | ||
112 | probe_position++; | ||
113 | } | ||
114 | } | ||
115 | pairs[probe_position].key = (Object *)key; | ||
116 | pairs[probe_position].value = (Object *)value; | ||
117 | } | ||
118 | |||
119 | void | ||
120 | _ht_maybe_grow(HashTable *table) { | ||
121 | HashTablePair *pairs = table->pairs; | ||
122 | if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { | ||
123 | return; | ||
124 | } | ||
125 | |||
126 | // Create a new array with 2x capacity. | ||
127 | table->pairs = NULL; | ||
128 | array_init(table->pairs, array_cap(pairs) * 2); | ||
129 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
130 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
131 | } | ||
132 | table->shift_amount++; | ||
133 | |||
134 | // Hash everything in the table for the new array capacity. | ||
135 | for (size_t i = 0; i < array_cap(pairs); i++) { | ||
136 | if (pairs[i].key != NULL) { | ||
137 | _ht_insert(table, pairs[i].key, pairs[i].value); | ||
138 | } | ||
139 | } | ||
140 | |||
141 | // Free the old array. | ||
142 | array_free(pairs); | ||
143 | } | ||
144 | |||
145 | void | ||
146 | ht_insert(HashTable *table, const Object *key, const Object *value) { | ||
147 | _ht_maybe_grow(table); | ||
148 | _ht_insert(table, key, value); | ||
149 | return; | ||
150 | } | ||
151 | |||
152 | Object * | ||
153 | ht_lookup(const HashTable *table, const Object *key) { | ||
154 | size_t position = ht_hash(table, key); | ||
155 | size_t probe_position = position; | ||
156 | |||
157 | // Verify the key in that position is the same. If not perform linear | ||
158 | // probing to find it. | ||
159 | HashTablePair *pairs = table->pairs; | ||
160 | while (true) { | ||
161 | if (pairs[probe_position].key == NULL) { | ||
162 | return NULL; | ||
163 | } | ||
164 | if (obj_eq(pairs[probe_position].key, key)) { | ||
165 | break; | ||
166 | } | ||
167 | if (probe_position == array_cap(pairs) - 1) { | ||
168 | probe_position = 0; | ||
169 | } else { | ||
170 | probe_position++; | ||
171 | } | ||
172 | if (probe_position == position) { | ||
173 | return NULL; | ||
174 | } | ||
175 | } | ||
176 | return pairs[probe_position].value; | ||
177 | } | ||
178 | |||
179 | void | ||
180 | ht_free(HashTable *table) { | ||
181 | if (table == NULL) { | ||
182 | return; | ||
183 | } | ||
184 | if (table->pairs == NULL) { | ||
185 | return; | ||
186 | } | ||
187 | array_free(table->pairs); | ||
188 | free(table); | ||
189 | } | ||
190 | |||
191 | #endif // BDL_HASHTABLE_H | ||