aboutsummaryrefslogtreecommitdiffstats
path: root/src/bytecode
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-10-24 11:52:56 +0200
committerBad Diode <bd@badd10de.dev>2021-10-24 11:58:43 +0200
commit6e27b20d10306d53cd838ef375fe80571dfe91ff (patch)
tree3ec4bbc6862e8b50e9e365b35a4e27d9c90d4bbc /src/bytecode
parentb743e03fc6042e3e2d55cfa0387c092824de64c5 (diff)
downloadbdl-6e27b20d10306d53cd838ef375fe80571dfe91ff.tar.gz
bdl-6e27b20d10306d53cd838ef375fe80571dfe91ff.zip
Add updated hash table with intern key-values
Diffstat (limited to 'src/bytecode')
-rwxr-xr-xsrc/bytecode/compiler.h10
-rw-r--r--src/bytecode/hashtable.h215
2 files changed, 223 insertions, 2 deletions
diff --git a/src/bytecode/compiler.h b/src/bytecode/compiler.h
index d404a5a..ec51942 100755
--- a/src/bytecode/compiler.h
+++ b/src/bytecode/compiler.h
@@ -33,11 +33,16 @@ has_next_token(const Visitor *visitor) {
33 33
34void 34void
35emit_constant(Chunk *chunk, Token tok, Object obj) { 35emit_constant(Chunk *chunk, Token tok, Object obj) {
36 // TODO: Should we deduplicate constants? For example why store a number 36 size_t prev_size = array_size(chunk->constants);
37 // more than once instead of reusing the existing index?
38 size_t num_idx = add_constant(chunk, obj); 37 size_t num_idx = add_constant(chunk, obj);
39 add_code(chunk, OP_CONSTANT, tok.line, tok.column); 38 add_code(chunk, OP_CONSTANT, tok.line, tok.column);
40 add_code(chunk, num_idx, tok.line, tok.column); 39 add_code(chunk, num_idx, tok.line, tok.column);
40
41 // If the non value constant was already present we need to properly free
42 // the memory from the object given to this function.
43 if (prev_size == array_size(chunk->constants)) {
44 object_free(obj);
45 }
41} 46}
42 47
43void 48void
@@ -180,6 +185,7 @@ parse_list(Chunk *chunk, Visitor *vs, Token start) {
180 .line = start.line, 185 .line = start.line,
181 .col = start.column, 186 .col = start.column,
182 }); 187 });
188 return;
183 } 189 }
184 Token tok = next_token(vs); 190 Token tok = next_token(vs);
185 switch (tok.type) { 191 switch (tok.type) {
diff --git a/src/bytecode/hashtable.h b/src/bytecode/hashtable.h
new file mode 100644
index 0000000..48665d3
--- /dev/null
+++ b/src/bytecode/hashtable.h
@@ -0,0 +1,215 @@
1#ifndef BDL_HASHTABLE_H
2#define BDL_HASHTABLE_H
3
4#include "darray.h"
5#include "objects.h"
6
7// Minimum table capacity.
8#define HT_MIN_CAP 4
9#define HT_MIN_SHIFT 2
10
11// Adjust the load factor threshold at which the table will grow on insertion.
12#define HT_LOAD_THRESHOLD 0.8
13
14typedef struct HashTablePair {
15 Object *key;
16 Object *value;
17} HashTablePair;
18
19struct HashTable;
20typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes);
21
22typedef struct HashTable {
23 // All available key-value pairs as a dynamic array.
24 HashTablePair *pairs;
25
26 // Internal storage.
27 Object *keys;
28 Object *values;
29
30 // Hash function.
31 HashFunction *hash_func;
32
33 // This table expects the number of buckets to grow in powers of two. To
34 // speedup the default hashing, we memoize the number of bits equivalent to
35 // that power of 2:
36 //
37 // cap := 1024 = 2 ^ 10, shift_amount := 10
38 //
39 uint8_t shift_amount;
40} HashTable;
41
42// Hash a byte stream using a circular shift + XOR hash function.
43static inline uint64_t
44_xor_shift_hash(const char *key, size_t n) {
45 uint64_t hash = 0x65d9d65f6a19574f;
46 char *last = (char *)key + n;
47 while (key != last) {
48 hash ^= (uint64_t)*key++;
49 hash = (hash << 8) | (hash >> (64 - 8));
50 }
51 return hash;
52}
53
54// Use Fibonacci hashing to map a hash to a value in range of the table.
55static inline uint64_t
56_fibonacci_hash(uint64_t hash, size_t shift_amount) {
57 return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount);
58}
59
60uint64_t
61_symbol_hash(const HashTable *table, void *key) {
62 Object *obj = key;
63 uint64_t hash = _xor_shift_hash(obj->text, array_size(obj->text));
64 hash = _fibonacci_hash(hash, table->shift_amount);
65 return hash;
66}
67
68static inline float
69ht_load_factor(const HashTable *table) {
70 return (float)array_size(table->pairs) / (float)array_cap(table->pairs);
71}
72
73HashTable *
74ht_init(void) {
75 HashTable *table = malloc(sizeof(HashTable));
76 *table = (HashTable){0};
77 array_init(table->pairs, HT_MIN_CAP);
78 array_init(table->keys, HT_MIN_CAP);
79 array_init(table->values, HT_MIN_CAP);
80 for (size_t i = 0; i < array_cap(table->pairs); i++) {
81 table->pairs[i] = (HashTablePair){NULL, NULL};
82 }
83 table->shift_amount = HT_MIN_SHIFT;
84 table->hash_func = _symbol_hash;
85 return table;
86}
87
88void
89_ht_insert(HashTable *table, Object key, Object value) {
90 size_t position = table->hash_func(table, &key);
91 size_t probe_position = position;
92
93 // Verify the key in that position is free. If not, use linear probing to
94 // find the next free slot.
95 HashTablePair *pairs = table->pairs;
96 bool update = false;
97 while (true) {
98 if (pairs[probe_position].key == NULL) {
99 array_head(pairs)->size++;
100 break;
101 }
102 if (object_equal(*pairs[probe_position].key, key)) {
103 update = true;
104 break;
105 }
106 if (probe_position == array_cap(pairs) - 1) {
107 probe_position = 0;
108 } else {
109 probe_position++;
110 }
111 }
112
113 if (!update) {
114 array_push(table->keys, object_copy(key));
115 array_push(table->values, object_copy(value));
116 pairs[probe_position].key = &table->keys[array_size(table->keys) - 1];
117 pairs[probe_position].value = &table->values[array_size(table->values) - 1];
118 } else {
119 object_free(*pairs[probe_position].value);
120 *pairs[probe_position].value = value;
121 }
122}
123
124void
125_ht_maybe_grow(HashTable *table) {
126 if (ht_load_factor(table) < HT_LOAD_THRESHOLD) {
127 return;
128 }
129
130 // Create a new array with 2x capacity.
131 HashTablePair *old_pairs = table->pairs;
132 Object *old_keys = table->keys;
133 Object *old_values = table->values;
134 table->pairs = NULL;
135 table->keys = NULL;
136 table->values = NULL;
137 array_init(table->pairs, array_cap(old_pairs) * 2);
138 array_init(table->keys, array_cap(old_pairs) * 2);
139 array_init(table->values, array_cap(old_pairs) * 2);
140 for (size_t i = 0; i < array_cap(table->pairs); i++) {
141 table->pairs[i] = (HashTablePair){NULL, NULL};
142 }
143 table->shift_amount++;
144
145 // Hash everything in the table for the new array capacity.
146 for (size_t i = 0; i < array_cap(old_pairs); i++) {
147 if (old_pairs[i].key != NULL) {
148 _ht_insert(table, *old_pairs[i].key, *old_pairs[i].value);
149 }
150 }
151
152 // Free old arrays.
153 array_free(old_pairs);
154 for (size_t i = 0; i < array_size(old_keys); i++) {
155 Object key = old_keys[i];
156 Object value = old_values[i];
157 object_free(key);
158 object_free(value);
159 }
160 array_free(old_keys);
161 array_free(old_values);
162}
163
164void
165ht_insert(HashTable *table, Object key, Object value) {
166 _ht_maybe_grow(table);
167 _ht_insert(table, key, value);
168 return;
169}
170
171Object *
172ht_lookup(const HashTable *table, Object key) {
173 size_t position = table->hash_func(table, &key);
174 size_t probe_position = position;
175
176 // Verify the key in that position is the same. If not perform linear
177 // probing to find it.
178 HashTablePair *pairs = table->pairs;
179 while (true) {
180 if (pairs[probe_position].key == NULL) {
181 return NULL;
182 }
183 if (object_equal(*pairs[probe_position].key, key)) {
184 break;
185 }
186 if (probe_position == array_cap(pairs) - 1) {
187 probe_position = 0;
188 } else {
189 probe_position++;
190 }
191 if (probe_position == position) {
192 return NULL;
193 }
194 }
195 return pairs[probe_position].value;
196}
197
198void
199ht_free(HashTable *table) {
200 if (table == NULL) {
201 return;
202 }
203 array_free(table->pairs);
204 for (size_t i = 0; i < array_size(table->keys); i++) {
205 Object key = table->keys[i];
206 Object value = table->values[i];
207 object_free(key);
208 object_free(value);
209 }
210 array_free(table->keys);
211 array_free(table->values);
212 free(table);
213}
214
215#endif // BDL_HASHTABLE_H