diff options
author | Bad Diode <bd@badd10de.dev> | 2021-10-30 10:13:11 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2021-10-30 10:13:11 +0200 |
commit | af3f2ebc42bf9dc64fc581b9f8f79e98895cb417 (patch) | |
tree | 5065fe2cc5ac04a7dcd2558f97e01222721d0664 | |
parent | f9a6691243d59915dad8785a321ca021bb27de27 (diff) | |
download | bdl-af3f2ebc42bf9dc64fc581b9f8f79e98895cb417.tar.gz bdl-af3f2ebc42bf9dc64fc581b9f8f79e98895cb417.zip |
Add hashtable for Environment tracking
-rw-r--r-- | src/hashtable.h | 180 | ||||
-rw-r--r-- | src/parser.c | 53 | ||||
-rwxr-xr-x | src/parser.h | 7 |
3 files changed, 240 insertions, 0 deletions
diff --git a/src/hashtable.h b/src/hashtable.h new file mode 100644 index 0000000..faa8591 --- /dev/null +++ b/src/hashtable.h | |||
@@ -0,0 +1,180 @@ | |||
1 | #ifndef BDL_HASHTABLE_H | ||
2 | #define BDL_HASHTABLE_H | ||
3 | |||
4 | #include "darray.h" | ||
5 | |||
6 | // Minimum table capacity. | ||
7 | #define HT_MIN_CAP 4 | ||
8 | #define HT_MIN_SHIFT 2 | ||
9 | |||
10 | // Adjust the load factor threshold at which the table will grow on insertion. | ||
11 | #define HT_LOAD_THRESHOLD 0.8 | ||
12 | |||
13 | typedef struct HashTablePair { | ||
14 | void *key; | ||
15 | void *value; | ||
16 | } HashTablePair; | ||
17 | |||
18 | struct HashTable; | ||
19 | typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes); | ||
20 | typedef bool (EqFunc)(void *a, void *b); | ||
21 | |||
22 | typedef struct HashTable { | ||
23 | // All available key-value pairs as a dynamic array. | ||
24 | HashTablePair *pairs; | ||
25 | |||
26 | // Hash function. | ||
27 | HashFunction *hash_func; | ||
28 | |||
29 | // Equality function. | ||
30 | EqFunc *eq_func; | ||
31 | |||
32 | // This table expects the number of buckets to grow in powers of two. To | ||
33 | // speedup the default hashing, we memoize the number of bits equivalent to | ||
34 | // that power of 2: | ||
35 | // | ||
36 | // cap := 1024 = 2 ^ 10, shift_amount := 10 | ||
37 | // | ||
38 | uint8_t shift_amount; | ||
39 | } HashTable; | ||
40 | |||
41 | // Hash a byte stream using a circular shift + XOR hash function. | ||
42 | static inline uint64_t | ||
43 | _xor_shift_hash(const char *key, size_t n) { | ||
44 | uint64_t hash = 0x65d9d65f6a19574f; | ||
45 | char *last = (char *)key + n; | ||
46 | while (key != last) { | ||
47 | hash ^= (uint64_t)*key++; | ||
48 | hash = (hash << 8) | (hash >> (64 - 8)); | ||
49 | } | ||
50 | return hash; | ||
51 | } | ||
52 | |||
53 | // Use Fibonacci hashing to map a hash to a value in range of the table. | ||
54 | static inline uint64_t | ||
55 | _fibonacci_hash(uint64_t hash, size_t shift_amount) { | ||
56 | return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); | ||
57 | } | ||
58 | |||
59 | static inline float | ||
60 | ht_load_factor(const HashTable *table) { | ||
61 | return (float)array_size(table->pairs) / (float)array_cap(table->pairs); | ||
62 | } | ||
63 | |||
64 | HashTable * | ||
65 | ht_init(HashFunction *hash_func, EqFunc *eq_func) { | ||
66 | HashTable *table = malloc(sizeof(HashTable)); | ||
67 | *table = (HashTable){0}; | ||
68 | array_init(table->pairs, HT_MIN_CAP); | ||
69 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
70 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
71 | } | ||
72 | table->shift_amount = HT_MIN_SHIFT; | ||
73 | table->hash_func = hash_func; | ||
74 | table->eq_func = eq_func; | ||
75 | return table; | ||
76 | } | ||
77 | |||
78 | void | ||
79 | _ht_insert(HashTable *table, void *key, void *value) { | ||
80 | size_t position = table->hash_func(table, key); | ||
81 | size_t probe_position = position; | ||
82 | |||
83 | // Verify the key in that position is free. If not, use linear probing to | ||
84 | // find the next free slot. | ||
85 | HashTablePair *pairs = table->pairs; | ||
86 | bool update = false; | ||
87 | while (true) { | ||
88 | if (pairs[probe_position].key == NULL) { | ||
89 | array_head(pairs)->size++; | ||
90 | break; | ||
91 | } | ||
92 | if (table->eq_func(pairs[probe_position].key, key)) { | ||
93 | update = true; | ||
94 | break; | ||
95 | } | ||
96 | if (probe_position == array_cap(pairs) - 1) { | ||
97 | probe_position = 0; | ||
98 | } else { | ||
99 | probe_position++; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | if (!update) { | ||
104 | pairs[probe_position].key = key; | ||
105 | pairs[probe_position].value = value; | ||
106 | } else { | ||
107 | pairs[probe_position].value = value; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | void | ||
112 | _ht_maybe_grow(HashTable *table) { | ||
113 | if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { | ||
114 | return; | ||
115 | } | ||
116 | |||
117 | // Create a new array with 2x capacity. | ||
118 | HashTablePair *old_pairs = table->pairs; | ||
119 | table->pairs = NULL; | ||
120 | array_init(table->pairs, array_cap(old_pairs) * 2); | ||
121 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
122 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
123 | } | ||
124 | table->shift_amount++; | ||
125 | |||
126 | // Hash everything in the table for the new array capacity. | ||
127 | for (size_t i = 0; i < array_cap(old_pairs); i++) { | ||
128 | if (old_pairs[i].key != NULL) { | ||
129 | _ht_insert(table, old_pairs[i].key, old_pairs[i].value); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | // Free old arrays. | ||
134 | array_free(old_pairs); | ||
135 | } | ||
136 | |||
137 | void | ||
138 | ht_insert(HashTable *table, void *key, void *value) { | ||
139 | _ht_maybe_grow(table); | ||
140 | _ht_insert(table, key, value); | ||
141 | return; | ||
142 | } | ||
143 | |||
144 | void * | ||
145 | ht_lookup(const HashTable *table, void *key) { | ||
146 | size_t position = table->hash_func(table, key); | ||
147 | size_t probe_position = position; | ||
148 | |||
149 | // Verify the key in that position is the same. If not perform linear | ||
150 | // probing to find it. | ||
151 | HashTablePair *pairs = table->pairs; | ||
152 | while (true) { | ||
153 | if (pairs[probe_position].key == NULL) { | ||
154 | return NULL; | ||
155 | } | ||
156 | if (table->eq_func(pairs[probe_position].key, key)) { | ||
157 | break; | ||
158 | } | ||
159 | if (probe_position == array_cap(pairs) - 1) { | ||
160 | probe_position = 0; | ||
161 | } else { | ||
162 | probe_position++; | ||
163 | } | ||
164 | if (probe_position == position) { | ||
165 | return NULL; | ||
166 | } | ||
167 | } | ||
168 | return pairs[probe_position].value; | ||
169 | } | ||
170 | |||
171 | void | ||
172 | ht_free(HashTable *table) { | ||
173 | if (table == NULL) { | ||
174 | return; | ||
175 | } | ||
176 | array_free(table->pairs); | ||
177 | free(table); | ||
178 | } | ||
179 | |||
180 | #endif // BDL_HASHTABLE_H | ||
diff --git a/src/parser.c b/src/parser.c index 2215fad..e16fc4a 100644 --- a/src/parser.c +++ b/src/parser.c | |||
@@ -4,6 +4,15 @@ | |||
4 | static Object **objects = NULL; | 4 | static Object **objects = NULL; |
5 | static Root *roots = NULL; | 5 | static Root *roots = NULL; |
6 | 6 | ||
7 | |||
8 | uint64_t | ||
9 | symbol_hash(const HashTable *table, void *key) { | ||
10 | Object *obj = key; | ||
11 | uint64_t hash = _xor_shift_hash(obj->text, array_size(obj->text)); | ||
12 | hash = _fibonacci_hash(hash, table->shift_amount); | ||
13 | return hash; | ||
14 | } | ||
15 | |||
7 | Token | 16 | Token |
8 | peek_token(const Parser *parser) { | 17 | peek_token(const Parser *parser) { |
9 | return parser->tokens[parser->current]; | 18 | return parser->tokens[parser->current]; |
@@ -391,6 +400,16 @@ parse(Token *tokens, Errors *errors) { | |||
391 | array_push(roots, root); | 400 | array_push(roots, root); |
392 | } | 401 | } |
393 | 402 | ||
403 | // TEST INSERTION/LOOKUP. | ||
404 | HashTable *table = ht_init(symbol_hash, object_equal); | ||
405 | ht_insert(table, &roots[0][0], &roots[1][0]); | ||
406 | ht_insert(table, &roots[2][0], &roots[3][0]); | ||
407 | Object *val = ht_lookup(table, &roots[2][0]); | ||
408 | object_display(val); | ||
409 | val = ht_lookup(table, &roots[0][0]); | ||
410 | object_display(val); | ||
411 | ht_free(table); | ||
412 | |||
394 | // Perform semantic analysis. | 413 | // Perform semantic analysis. |
395 | // TODO: Check that symbols are defined before usage. | 414 | // TODO: Check that symbols are defined before usage. |
396 | // TODO: Remove unnecessary statements. | 415 | // TODO: Remove unnecessary statements. |
@@ -522,3 +541,37 @@ object_display(Object *obj) { | |||
522 | } | 541 | } |
523 | return; | 542 | return; |
524 | } | 543 | } |
544 | |||
545 | bool | ||
546 | object_equal(Object *a, Object *b) { | ||
547 | if (a == NULL || b == NULL || a->type != b->type) { | ||
548 | return false; | ||
549 | } | ||
550 | switch (a->type) { | ||
551 | case OBJ_TYPE_TRUE: | ||
552 | case OBJ_TYPE_FALSE: { | ||
553 | return true; | ||
554 | } break; | ||
555 | case OBJ_TYPE_FIXNUM: { | ||
556 | return a->fixnum == b->fixnum; | ||
557 | } break; | ||
558 | case OBJ_TYPE_SYMBOL: | ||
559 | case OBJ_TYPE_STRING: { | ||
560 | if (array_size(a->text) != array_size(b->text)) { | ||
561 | return false; | ||
562 | } | ||
563 | for (size_t i = 0; i < array_size(a->text); i++) { | ||
564 | if (a->text[i] != b->text[i]) { | ||
565 | return false; | ||
566 | } | ||
567 | } | ||
568 | } break; | ||
569 | case OBJ_TYPE_LAMBDA: { | ||
570 | // return a->closure == b.closure; | ||
571 | } break; | ||
572 | default: { | ||
573 | return false; | ||
574 | } break; | ||
575 | } | ||
576 | return true; | ||
577 | } | ||
diff --git a/src/parser.h b/src/parser.h index 14d9df5..d1eddc7 100755 --- a/src/parser.h +++ b/src/parser.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define BDL_PARSER_H | 2 | #define BDL_PARSER_H |
3 | 3 | ||
4 | #include "lexer.h" | 4 | #include "lexer.h" |
5 | #include "hashtable.h" | ||
5 | 6 | ||
6 | typedef enum ObjectType { | 7 | typedef enum ObjectType { |
7 | OBJ_TYPE_NIL, | 8 | OBJ_TYPE_NIL, |
@@ -58,6 +59,11 @@ typedef struct Object { | |||
58 | size_t col; | 59 | size_t col; |
59 | } Object; | 60 | } Object; |
60 | 61 | ||
62 | typedef struct Environment { | ||
63 | HashTable *table; | ||
64 | struct Environment *parent; | ||
65 | }; | ||
66 | |||
61 | typedef struct Parser { | 67 | typedef struct Parser { |
62 | Token *tokens; | 68 | Token *tokens; |
63 | size_t current; | 69 | size_t current; |
@@ -86,6 +92,7 @@ Root * parse(Token *tokens, Errors *errors); | |||
86 | 92 | ||
87 | // Object operations. | 93 | // Object operations. |
88 | void object_display(Object *obj); | 94 | void object_display(Object *obj); |
95 | bool object_equal(Object *a, Object *b); | ||
89 | 96 | ||
90 | // Manage resources. | 97 | // Manage resources. |
91 | Object * object_alloc(Token tok, ObjectType type); | 98 | Object * object_alloc(Token tok, ObjectType type); |