diff options
author | Bad Diode <bd@badd10de.dev> | 2024-06-21 11:40:31 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2024-06-21 11:40:31 +0200 |
commit | 4646adb64119d9bd761447024a2f35cc0c9c2115 (patch) | |
tree | 116dfd0f2ad27933639188e6d29eec42305124d7 /src/main.c | |
parent | c10fe9d48d40e3fa2a20ee61b79518dfbaeb4db9 (diff) | |
download | bdl-4646adb64119d9bd761447024a2f35cc0c9c2115.tar.gz bdl-4646adb64119d9bd761447024a2f35cc0c9c2115.zip |
Add a basic symbol checker
Diffstat (limited to 'src/main.c')
-rw-r--r-- | src/main.c | 284 |
1 files changed, 271 insertions, 13 deletions
@@ -22,21 +22,248 @@ init(void) { | |||
22 | log_init_default(); | 22 | log_init_default(); |
23 | } | 23 | } |
24 | 24 | ||
25 | typedef enum { | ||
26 | SYM_VAR, | ||
27 | SYM_FUN, | ||
28 | SYM_PARAM, | ||
29 | SYM_BUILTIN, | ||
30 | } SymbolKind; | ||
31 | |||
32 | typedef struct Symbol { | ||
33 | SymbolKind kind; | ||
34 | } Symbol; | ||
35 | |||
36 | MAPDEF(SymbolMap, symmap, Str, Symbol, str_hash, str_eq) | ||
37 | |||
38 | typedef struct Scope { | ||
39 | sz id; | ||
40 | sz depth; | ||
41 | SymbolMap *symbols; | ||
42 | struct Scope *parent; | ||
43 | } Scope; | ||
44 | |||
45 | typedef struct Analyzer { | ||
46 | Arena *storage; | ||
47 | Str file_name; | ||
48 | sz scope_gen; | ||
49 | Scope **scopes; | ||
50 | } Analyzer; | ||
51 | |||
52 | Scope * | ||
53 | scope_alloc(Analyzer *a, Scope *parent) { | ||
54 | bool is_root = parent == NULL; | ||
55 | Scope *scope = arena_malloc(sizeof(Scope), a->storage); | ||
56 | scope->id = a->scope_gen++; | ||
57 | scope->parent = parent; | ||
58 | scope->depth = is_root ? 0 : parent->depth + 1; | ||
59 | array_push(a->scopes, scope, a->storage); | ||
60 | return scope; | ||
61 | } | ||
62 | |||
63 | SymbolMap * | ||
64 | find_symbol(Scope *scope, Str symbol) { | ||
65 | while (scope != NULL) { | ||
66 | SymbolMap *val = symmap_lookup(&scope->symbols, symbol); | ||
67 | if (val != NULL) { | ||
68 | return val; | ||
69 | } | ||
70 | scope = scope->parent; | ||
71 | } | ||
72 | return NULL; | ||
73 | } | ||
74 | |||
25 | void | 75 | void |
26 | process_file(Str path) { | 76 | analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { |
27 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); | 77 | assert(a); |
78 | assert(scope); | ||
79 | if (!node) { | ||
80 | return; | ||
81 | } | ||
82 | assert(node); | ||
83 | switch (node->kind) { | ||
84 | case NODE_FUN: { | ||
85 | if (scope->depth != 0) { | ||
86 | Str symbol = node->func_name->value.str; | ||
87 | if (symmap_lookup(&scope->symbols, symbol) != NULL) { | ||
88 | eprintln( | ||
89 | "%s:%d:%d: error: symbol '%s' already exists in " | ||
90 | "current " | ||
91 | "scope", | ||
92 | a->file_name, node->func_name->line, | ||
93 | node->func_name->col, symbol); | ||
94 | } | ||
95 | symmap_insert(&scope->symbols, symbol, | ||
96 | (Symbol){.kind = SYM_FUN}, a->storage); | ||
97 | } | ||
98 | |||
99 | scope = scope_alloc(a, scope); | ||
100 | for (sz i = 0; i < array_size(node->func_params); i++) { | ||
101 | Node *param = node->func_params[i]; | ||
102 | symmap_insert(&scope->symbols, param->param_name->value.str, | ||
103 | (Symbol){.kind = SYM_PARAM}, a->storage); | ||
104 | } | ||
105 | analyzer_symbols(a, node->func_body, scope); | ||
106 | } break; | ||
107 | case NODE_COND: | ||
108 | case NODE_MATCH: { | ||
109 | if (node->match_expr) { | ||
110 | analyzer_symbols(a, node->match_expr, scope); | ||
111 | } | ||
112 | for (sz i = 0; i < array_size(node->match_cases); i++) { | ||
113 | Node *expr = node->match_cases[i]; | ||
114 | analyzer_symbols(a, expr, scope); | ||
115 | } | ||
116 | } break; | ||
117 | case NODE_RETURN: | ||
118 | // TODO: Struct field initializers need to be checked... when we | ||
119 | // get to that. | ||
120 | // case NODE_VAL_FIELD: | ||
121 | // case NODE_ENUM: | ||
122 | // case NODE_STRUCT: | ||
123 | // case NODE_STRUCT_LIT: | ||
124 | case NODE_BLOCK: { | ||
125 | Scope *next = scope_alloc(a, scope); | ||
126 | for (sz i = 0; i < array_size(node->elements); i++) { | ||
127 | Node *expr = node->elements[i]; | ||
128 | analyzer_symbols(a, expr, next); | ||
129 | } | ||
130 | } break; | ||
131 | case NODE_CASE: { | ||
132 | analyzer_symbols(a, node->case_value, scope); | ||
133 | if (node->case_expr) { | ||
134 | Scope *next = scope_alloc(a, scope); | ||
135 | analyzer_symbols(a, node->case_expr, next); | ||
136 | } | ||
137 | } break; | ||
138 | case NODE_IF: { | ||
139 | analyzer_symbols(a, node->cond_if, scope); | ||
140 | if (node->cond_expr) { | ||
141 | Scope *next = scope_alloc(a, scope); | ||
142 | analyzer_symbols(a, node->cond_expr, next); | ||
143 | } | ||
144 | if (node->cond_else) { | ||
145 | Scope *next = scope_alloc(a, scope); | ||
146 | analyzer_symbols(a, node->cond_else, next); | ||
147 | } | ||
148 | } break; | ||
149 | case NODE_WHILE: { | ||
150 | analyzer_symbols(a, node->while_cond, scope); | ||
151 | scope = scope_alloc(a, scope); | ||
152 | analyzer_symbols(a, node->while_expr, scope); | ||
153 | } break; | ||
154 | case NODE_FUNCALL: { | ||
155 | Str symbol = node->value.str; | ||
156 | if (find_symbol(scope, symbol) == NULL) { | ||
157 | eprintln( | ||
158 | "%s:%d:%d: error: symbol '%s' doesn't exists in current " | ||
159 | " scope ", | ||
160 | a->file_name, node->line, node->col, symbol); | ||
161 | } | ||
162 | Scope *next = scope_alloc(a, scope); | ||
163 | for (sz i = 0; i < array_size(node->elements); i++) { | ||
164 | Node *expr = node->elements[i]; | ||
165 | analyzer_symbols(a, expr, next); | ||
166 | } | ||
167 | } break; | ||
168 | case NODE_SYMBOL_IDX: | ||
169 | case NODE_SYMBOL: { | ||
170 | Str symbol = node->value.str; | ||
171 | if (find_symbol(scope, symbol) == NULL) { | ||
172 | eprintln( | ||
173 | "%s:%d:%d: error: symbol '%s' doesn't exists in current " | ||
174 | " scope ", | ||
175 | a->file_name, node->line, node->col, symbol); | ||
176 | } | ||
177 | // TODO: Resolve symbol chains. | ||
178 | } break; | ||
179 | case NODE_SET: { | ||
180 | analyzer_symbols(a, node->var_name, scope); | ||
181 | analyzer_symbols(a, node->var_val, scope); | ||
182 | } break; | ||
183 | case NODE_LET: { | ||
184 | // Check the value first to avoid recursive symbol usage. | ||
185 | analyzer_symbols(a, node->var_val, scope); | ||
186 | |||
187 | Str symbol = node->var_name->value.str; | ||
188 | if (symmap_lookup(&scope->symbols, symbol) != NULL) { | ||
189 | eprintln( | ||
190 | "%s:%d:%d: error: symbol '%s' already exists in current " | ||
191 | " scope ", | ||
192 | a->file_name, node->var_name->line, node->var_name->col, | ||
193 | symbol); | ||
194 | } | ||
195 | symmap_insert(&scope->symbols, symbol, (Symbol){.kind = SYM_VAR}, | ||
196 | a->storage); | ||
197 | } break; | ||
198 | // Binary ops | ||
199 | case NODE_ADD: | ||
200 | case NODE_SUB: | ||
201 | case NODE_DIV: | ||
202 | case NODE_MUL: | ||
203 | case NODE_MOD: | ||
204 | case NODE_NOT: | ||
205 | case NODE_AND: | ||
206 | case NODE_OR: | ||
207 | case NODE_EQ: | ||
208 | case NODE_NEQ: | ||
209 | case NODE_LT: | ||
210 | case NODE_GT: | ||
211 | case NODE_LE: | ||
212 | case NODE_GE: | ||
213 | case NODE_BITNOT: | ||
214 | case NODE_BITAND: | ||
215 | case NODE_BITOR: | ||
216 | case NODE_BITLSHIFT: | ||
217 | case NODE_BITRSHIFT: { | ||
218 | analyzer_symbols(a, node->left, scope); | ||
219 | analyzer_symbols(a, node->right, scope); | ||
220 | } break; | ||
221 | default: break; | ||
222 | } | ||
223 | } | ||
224 | |||
225 | void | ||
226 | symbolic_analysis(Analyzer *a, Parser *parser) { | ||
227 | Scope *scope = scope_alloc(a, NULL); | ||
228 | |||
229 | // Fill builtin functions. | ||
230 | Str builtin_functions[] = { | ||
231 | cstr("print"), | ||
232 | cstr("println"), | ||
233 | }; | ||
234 | for (sz i = 0; i < LEN(builtin_functions); i++) { | ||
235 | Str symbol = builtin_functions[i]; | ||
236 | symmap_insert(&scope->symbols, symbol, (Symbol){.kind = SYM_BUILTIN}, | ||
237 | a->storage); | ||
238 | } | ||
239 | |||
240 | // Find top level function declarations. | ||
241 | for (sz i = 0; i < array_size(parser->nodes); i++) { | ||
242 | Node *root = parser->nodes[i]; | ||
243 | if (root->kind == NODE_FUN) { | ||
244 | Str symbol = root->func_name->value.str; | ||
245 | if (symmap_lookup(&scope->symbols, symbol) != NULL) { | ||
246 | eprintln( | ||
247 | "%s:%d:%d: error: symbol '%s' already exists in current " | ||
248 | "scope", | ||
249 | a->file_name, root->func_name->line, root->func_name->col, | ||
250 | symbol); | ||
251 | } | ||
252 | symmap_insert(&scope->symbols, symbol, (Symbol){.kind = SYM_FUN}, | ||
253 | a->storage); | ||
254 | } | ||
255 | } | ||
28 | 256 | ||
29 | StrIntMap *map = NULL; | 257 | // Recursively fill symbol tables. |
30 | strintmap_insert(&map, cstr("test"), 1, &lexer_arena); | 258 | for (sz i = 0; i < array_size(parser->nodes); i++) { |
31 | strintmap_insert(&map, cstr("toast"), 9, &lexer_arena); | 259 | Node *root = parser->nodes[i]; |
32 | strintmap_insert(&map, cstr("waaaa"), 420, &lexer_arena); | 260 | analyzer_symbols(a, root, scope); |
33 | strintmap_insert(&map, cstr("test"), 69, &lexer_arena); | ||
34 | StrIntMapIter iter = strintmap_iterator(map, &lexer_arena); | ||
35 | StrIntMap *val = strintmap_next(&iter, &lexer_arena); | ||
36 | while (val) { | ||
37 | println("KEY: %s value: %d", val->key, val->val); | ||
38 | val = strintmap_next(&iter, &lexer_arena); | ||
39 | } | 261 | } |
262 | } | ||
263 | |||
264 | void | ||
265 | process_file(Str path) { | ||
266 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); | ||
40 | 267 | ||
41 | FileContents file = platform_read_file(path, &lexer_arena); | 268 | FileContents file = platform_read_file(path, &lexer_arena); |
42 | if (file.err) { | 269 | if (file.err) { |
@@ -94,7 +321,34 @@ process_file(Str path) { | |||
94 | graph_ast(parser.nodes); | 321 | graph_ast(parser.nodes); |
95 | goto stop; | 322 | goto stop; |
96 | } | 323 | } |
97 | // TODO: Semantic analysis. | 324 | |
325 | // Semantic analysis. | ||
326 | Analyzer analyzer = (Analyzer){ | ||
327 | .storage = &lexer_arena, | ||
328 | .file_name = path, | ||
329 | }; | ||
330 | symbolic_analysis(&analyzer, &parser); | ||
331 | |||
332 | // DEBUG: Printing all symbols on the HT. | ||
333 | // StrSetIter iter = strset_iterator(root_scope.symbols, &lexer_arena); | ||
334 | // StrSet *val = strset_next(&iter, &lexer_arena); | ||
335 | // while (val) { | ||
336 | // println("SYMBOL: %s", val->key); | ||
337 | // val = strset_next(&iter, &lexer_arena); | ||
338 | // } | ||
339 | |||
340 | // StrIntMap *map = NULL; | ||
341 | // strintmap_insert(&map, cstr("test"), 1, &lexer_arena); | ||
342 | // strintmap_insert(&map, cstr("toast"), 9, &lexer_arena); | ||
343 | // strintmap_insert(&map, cstr("waaaa"), 420, &lexer_arena); | ||
344 | // strintmap_insert(&map, cstr("test"), 69, &lexer_arena); | ||
345 | // StrIntMapIter iter = strintmap_iterator(map, &lexer_arena); | ||
346 | // StrIntMap *val = strintmap_next(&iter, &lexer_arena); | ||
347 | // while (val) { | ||
348 | // println("KEY: %s value: %d", val->key, val->val); | ||
349 | // val = strintmap_next(&iter, &lexer_arena); | ||
350 | // } | ||
351 | |||
98 | // TODO: Type checking. | 352 | // TODO: Type checking. |
99 | 353 | ||
100 | // Compile roots. | 354 | // Compile roots. |
@@ -136,6 +390,10 @@ process_file(Str path) { | |||
136 | // &(Array){.mem = (u8 *)&vm.regs, sizeof(vm.regs)}); | 390 | // &(Array){.mem = (u8 *)&vm.regs, sizeof(vm.regs)}); |
137 | // disassemble_chunk(chunk); | 391 | // disassemble_chunk(chunk); |
138 | 392 | ||
393 | #if DEBUG == 1 | ||
394 | println("Space used: %{Arena}", &lexer_arena); | ||
395 | #endif | ||
396 | |||
139 | // arena_destroy(&bytecode_arena, os_allocator); | 397 | // arena_destroy(&bytecode_arena, os_allocator); |
140 | stop: | 398 | stop: |
141 | // Free up resources. | 399 | // Free up resources. |