diff options
-rwxr-xr-x | src/bytecode/compiler.h | 10 | ||||
-rw-r--r-- | src/bytecode/hashtable.h | 215 |
2 files changed, 223 insertions, 2 deletions
diff --git a/src/bytecode/compiler.h b/src/bytecode/compiler.h index d404a5a..ec51942 100755 --- a/src/bytecode/compiler.h +++ b/src/bytecode/compiler.h | |||
@@ -33,11 +33,16 @@ has_next_token(const Visitor *visitor) { | |||
33 | 33 | ||
34 | void | 34 | void |
35 | emit_constant(Chunk *chunk, Token tok, Object obj) { | 35 | emit_constant(Chunk *chunk, Token tok, Object obj) { |
36 | // TODO: Should we deduplicate constants? For example why store a number | 36 | size_t prev_size = array_size(chunk->constants); |
37 | // more than once instead of reusing the existing index? | ||
38 | size_t num_idx = add_constant(chunk, obj); | 37 | size_t num_idx = add_constant(chunk, obj); |
39 | add_code(chunk, OP_CONSTANT, tok.line, tok.column); | 38 | add_code(chunk, OP_CONSTANT, tok.line, tok.column); |
40 | add_code(chunk, num_idx, tok.line, tok.column); | 39 | add_code(chunk, num_idx, tok.line, tok.column); |
40 | |||
41 | // If the non value constant was already present we need to properly free | ||
42 | // the memory from the object given to this function. | ||
43 | if (prev_size == array_size(chunk->constants)) { | ||
44 | object_free(obj); | ||
45 | } | ||
41 | } | 46 | } |
42 | 47 | ||
43 | void | 48 | void |
@@ -180,6 +185,7 @@ parse_list(Chunk *chunk, Visitor *vs, Token start) { | |||
180 | .line = start.line, | 185 | .line = start.line, |
181 | .col = start.column, | 186 | .col = start.column, |
182 | }); | 187 | }); |
188 | return; | ||
183 | } | 189 | } |
184 | Token tok = next_token(vs); | 190 | Token tok = next_token(vs); |
185 | switch (tok.type) { | 191 | switch (tok.type) { |
diff --git a/src/bytecode/hashtable.h b/src/bytecode/hashtable.h new file mode 100644 index 0000000..48665d3 --- /dev/null +++ b/src/bytecode/hashtable.h | |||
@@ -0,0 +1,215 @@ | |||
1 | #ifndef BDL_HASHTABLE_H | ||
2 | #define BDL_HASHTABLE_H | ||
3 | |||
4 | #include "darray.h" | ||
5 | #include "objects.h" | ||
6 | |||
7 | // Minimum table capacity. | ||
8 | #define HT_MIN_CAP 4 | ||
9 | #define HT_MIN_SHIFT 2 | ||
10 | |||
11 | // Adjust the load factor threshold at which the table will grow on insertion. | ||
12 | #define HT_LOAD_THRESHOLD 0.8 | ||
13 | |||
14 | typedef struct HashTablePair { | ||
15 | Object *key; | ||
16 | Object *value; | ||
17 | } HashTablePair; | ||
18 | |||
19 | struct HashTable; | ||
20 | typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes); | ||
21 | |||
22 | typedef struct HashTable { | ||
23 | // All available key-value pairs as a dynamic array. | ||
24 | HashTablePair *pairs; | ||
25 | |||
26 | // Internal storage. | ||
27 | Object *keys; | ||
28 | Object *values; | ||
29 | |||
30 | // Hash function. | ||
31 | HashFunction *hash_func; | ||
32 | |||
33 | // This table expects the number of buckets to grow in powers of two. To | ||
34 | // speedup the default hashing, we memoize the number of bits equivalent to | ||
35 | // that power of 2: | ||
36 | // | ||
37 | // cap := 1024 = 2 ^ 10, shift_amount := 10 | ||
38 | // | ||
39 | uint8_t shift_amount; | ||
40 | } HashTable; | ||
41 | |||
42 | // Hash a byte stream using a circular shift + XOR hash function. | ||
43 | static inline uint64_t | ||
44 | _xor_shift_hash(const char *key, size_t n) { | ||
45 | uint64_t hash = 0x65d9d65f6a19574f; | ||
46 | char *last = (char *)key + n; | ||
47 | while (key != last) { | ||
48 | hash ^= (uint64_t)*key++; | ||
49 | hash = (hash << 8) | (hash >> (64 - 8)); | ||
50 | } | ||
51 | return hash; | ||
52 | } | ||
53 | |||
54 | // Use Fibonacci hashing to map a hash to a value in range of the table. | ||
55 | static inline uint64_t | ||
56 | _fibonacci_hash(uint64_t hash, size_t shift_amount) { | ||
57 | return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); | ||
58 | } | ||
59 | |||
60 | uint64_t | ||
61 | _symbol_hash(const HashTable *table, void *key) { | ||
62 | Object *obj = key; | ||
63 | uint64_t hash = _xor_shift_hash(obj->text, array_size(obj->text)); | ||
64 | hash = _fibonacci_hash(hash, table->shift_amount); | ||
65 | return hash; | ||
66 | } | ||
67 | |||
68 | static inline float | ||
69 | ht_load_factor(const HashTable *table) { | ||
70 | return (float)array_size(table->pairs) / (float)array_cap(table->pairs); | ||
71 | } | ||
72 | |||
73 | HashTable * | ||
74 | ht_init(void) { | ||
75 | HashTable *table = malloc(sizeof(HashTable)); | ||
76 | *table = (HashTable){0}; | ||
77 | array_init(table->pairs, HT_MIN_CAP); | ||
78 | array_init(table->keys, HT_MIN_CAP); | ||
79 | array_init(table->values, HT_MIN_CAP); | ||
80 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
81 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
82 | } | ||
83 | table->shift_amount = HT_MIN_SHIFT; | ||
84 | table->hash_func = _symbol_hash; | ||
85 | return table; | ||
86 | } | ||
87 | |||
88 | void | ||
89 | _ht_insert(HashTable *table, Object key, Object value) { | ||
90 | size_t position = table->hash_func(table, &key); | ||
91 | size_t probe_position = position; | ||
92 | |||
93 | // Verify the key in that position is free. If not, use linear probing to | ||
94 | // find the next free slot. | ||
95 | HashTablePair *pairs = table->pairs; | ||
96 | bool update = false; | ||
97 | while (true) { | ||
98 | if (pairs[probe_position].key == NULL) { | ||
99 | array_head(pairs)->size++; | ||
100 | break; | ||
101 | } | ||
102 | if (object_equal(*pairs[probe_position].key, key)) { | ||
103 | update = true; | ||
104 | break; | ||
105 | } | ||
106 | if (probe_position == array_cap(pairs) - 1) { | ||
107 | probe_position = 0; | ||
108 | } else { | ||
109 | probe_position++; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | if (!update) { | ||
114 | array_push(table->keys, object_copy(key)); | ||
115 | array_push(table->values, object_copy(value)); | ||
116 | pairs[probe_position].key = &table->keys[array_size(table->keys) - 1]; | ||
117 | pairs[probe_position].value = &table->values[array_size(table->values) - 1]; | ||
118 | } else { | ||
119 | object_free(*pairs[probe_position].value); | ||
120 | *pairs[probe_position].value = value; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | void | ||
125 | _ht_maybe_grow(HashTable *table) { | ||
126 | if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { | ||
127 | return; | ||
128 | } | ||
129 | |||
130 | // Create a new array with 2x capacity. | ||
131 | HashTablePair *old_pairs = table->pairs; | ||
132 | Object *old_keys = table->keys; | ||
133 | Object *old_values = table->values; | ||
134 | table->pairs = NULL; | ||
135 | table->keys = NULL; | ||
136 | table->values = NULL; | ||
137 | array_init(table->pairs, array_cap(old_pairs) * 2); | ||
138 | array_init(table->keys, array_cap(old_pairs) * 2); | ||
139 | array_init(table->values, array_cap(old_pairs) * 2); | ||
140 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
141 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
142 | } | ||
143 | table->shift_amount++; | ||
144 | |||
145 | // Hash everything in the table for the new array capacity. | ||
146 | for (size_t i = 0; i < array_cap(old_pairs); i++) { | ||
147 | if (old_pairs[i].key != NULL) { | ||
148 | _ht_insert(table, *old_pairs[i].key, *old_pairs[i].value); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | // Free old arrays. | ||
153 | array_free(old_pairs); | ||
154 | for (size_t i = 0; i < array_size(old_keys); i++) { | ||
155 | Object key = old_keys[i]; | ||
156 | Object value = old_values[i]; | ||
157 | object_free(key); | ||
158 | object_free(value); | ||
159 | } | ||
160 | array_free(old_keys); | ||
161 | array_free(old_values); | ||
162 | } | ||
163 | |||
164 | void | ||
165 | ht_insert(HashTable *table, Object key, Object value) { | ||
166 | _ht_maybe_grow(table); | ||
167 | _ht_insert(table, key, value); | ||
168 | return; | ||
169 | } | ||
170 | |||
171 | Object * | ||
172 | ht_lookup(const HashTable *table, Object key) { | ||
173 | size_t position = table->hash_func(table, &key); | ||
174 | size_t probe_position = position; | ||
175 | |||
176 | // Verify the key in that position is the same. If not perform linear | ||
177 | // probing to find it. | ||
178 | HashTablePair *pairs = table->pairs; | ||
179 | while (true) { | ||
180 | if (pairs[probe_position].key == NULL) { | ||
181 | return NULL; | ||
182 | } | ||
183 | if (object_equal(*pairs[probe_position].key, key)) { | ||
184 | break; | ||
185 | } | ||
186 | if (probe_position == array_cap(pairs) - 1) { | ||
187 | probe_position = 0; | ||
188 | } else { | ||
189 | probe_position++; | ||
190 | } | ||
191 | if (probe_position == position) { | ||
192 | return NULL; | ||
193 | } | ||
194 | } | ||
195 | return pairs[probe_position].value; | ||
196 | } | ||
197 | |||
198 | void | ||
199 | ht_free(HashTable *table) { | ||
200 | if (table == NULL) { | ||
201 | return; | ||
202 | } | ||
203 | array_free(table->pairs); | ||
204 | for (size_t i = 0; i < array_size(table->keys); i++) { | ||
205 | Object key = table->keys[i]; | ||
206 | Object value = table->values[i]; | ||
207 | object_free(key); | ||
208 | object_free(value); | ||
209 | } | ||
210 | array_free(table->keys); | ||
211 | array_free(table->values); | ||
212 | free(table); | ||
213 | } | ||
214 | |||
215 | #endif // BDL_HASHTABLE_H | ||