diff options
author | Bad Diode <bd@badd10de.dev> | 2022-04-18 16:27:21 -0300 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2022-04-18 16:27:21 -0300 |
commit | 3da041f2e17fdeb69bf345aadf89c5fcc1814260 (patch) | |
tree | c1979ffee13f45f757712a61304a3edba89a80f5 /src/semantic.c | |
parent | dcd3192e50d7b4ea333ecf57a7e8b325af145547 (diff) | |
download | bdl-3da041f2e17fdeb69bf345aadf89c5fcc1814260.tar.gz bdl-3da041f2e17fdeb69bf345aadf89c5fcc1814260.zip |
Move semantic analysis to separate file
Diffstat (limited to 'src/semantic.c')
-rw-r--r-- | src/semantic.c | 527 |
1 files changed, 527 insertions, 0 deletions
diff --git a/src/semantic.c b/src/semantic.c new file mode 100644 index 0000000..06958b9 --- /dev/null +++ b/src/semantic.c | |||
@@ -0,0 +1,527 @@ | |||
1 | #include "hashtable.h" | ||
2 | |||
3 | typedef struct Scope { | ||
4 | struct Scope *parent; | ||
5 | HashTable *symbols; | ||
6 | HashTable *types; | ||
7 | } Scope; | ||
8 | |||
9 | typedef struct ParseTree { | ||
10 | Root *roots; | ||
11 | Scope *global_scope; | ||
12 | Scope *current_scope; | ||
13 | } ParseTree; | ||
14 | |||
15 | typedef struct Type { | ||
16 | StringView name; | ||
17 | size_t size; // (bytes) | ||
18 | } Type; | ||
19 | |||
20 | typedef enum DefaultType { | ||
21 | TYPE_VOID, | ||
22 | TYPE_BOOL, | ||
23 | TYPE_STR, | ||
24 | TYPE_U8, | ||
25 | TYPE_U16, | ||
26 | TYPE_U32, | ||
27 | TYPE_U64, | ||
28 | TYPE_S8, | ||
29 | TYPE_S16, | ||
30 | TYPE_S32, | ||
31 | TYPE_S64, | ||
32 | TYPE_F32, | ||
33 | TYPE_F64, | ||
34 | } DefaultType; | ||
35 | |||
36 | static Type default_types[] = { | ||
37 | [TYPE_VOID] = {STRING("void"), 0}, | ||
38 | [TYPE_BOOL] = {STRING("bool"), 1}, | ||
39 | [TYPE_STR] = {STRING("str"), 16}, // size (8) + pointer to data (8). | ||
40 | [TYPE_U8] = {STRING("u8"), 1}, | ||
41 | [TYPE_U16] = {STRING("u16"), 2}, | ||
42 | [TYPE_U32] = {STRING("u32"), 4}, | ||
43 | [TYPE_U64] = {STRING("u64"), 8}, | ||
44 | [TYPE_S8] = {STRING("s8"), 1}, | ||
45 | [TYPE_S16] = {STRING("s16"), 2}, | ||
46 | [TYPE_S32] = {STRING("s32"), 4}, | ||
47 | [TYPE_S64] = {STRING("s64"), 8}, | ||
48 | [TYPE_F32] = {STRING("f32"), 4}, | ||
49 | [TYPE_F64] = {STRING("f64"), 8}, | ||
50 | }; | ||
51 | |||
52 | typedef enum SymbolType { | ||
53 | SYMBOL_VAR, | ||
54 | SYMBOL_FUN, | ||
55 | } SymbolType; | ||
56 | |||
57 | typedef struct Symbol { | ||
58 | Node *name; | ||
59 | SymbolType type; | ||
60 | |||
61 | union { | ||
62 | struct { | ||
63 | Node *type; | ||
64 | } var; | ||
65 | |||
66 | struct { | ||
67 | Node **param_types; | ||
68 | Node *return_type; | ||
69 | } fun; | ||
70 | }; | ||
71 | } Symbol; | ||
72 | |||
73 | |||
74 | Symbol * | ||
75 | alloc_symval(Node *name, SymbolType type) { | ||
76 | Symbol *val = malloc(sizeof(Symbol)); | ||
77 | val->name = name; | ||
78 | val->type = type; | ||
79 | return val; | ||
80 | } | ||
81 | |||
82 | u64 sym_hash(const struct HashTable *table, void *bytes) { | ||
83 | Node *symbol = bytes; | ||
84 | u64 hash = _xor_shift_hash(symbol->string.start, symbol->string.n); | ||
85 | hash = _fibonacci_hash(hash, table->shift_amount); | ||
86 | return hash; | ||
87 | } | ||
88 | |||
89 | bool sym_eq(void *a, void *b) { | ||
90 | Node *a_node = a; | ||
91 | Node *b_node = b; | ||
92 | assert(a_node->type == NODE_SYMBOL); | ||
93 | assert(b_node->type == NODE_SYMBOL); | ||
94 | return sv_equal(&a_node->string, &b_node->string); | ||
95 | } | ||
96 | |||
97 | u64 type_hash(const struct HashTable *table, void *bytes) { | ||
98 | StringView *type = bytes; | ||
99 | u64 hash = _xor_shift_hash(type->start, type->n); | ||
100 | hash = _fibonacci_hash(hash, table->shift_amount); | ||
101 | return hash; | ||
102 | } | ||
103 | |||
104 | bool type_eq(void *a, void *b) { | ||
105 | StringView *a_type = a; | ||
106 | StringView *b_type = b; | ||
107 | return sv_equal(a_type, b_type); | ||
108 | } | ||
109 | |||
110 | Scope * | ||
111 | alloc_scope(Scope *parent) { | ||
112 | Scope *scope = malloc(sizeof(Scope)); | ||
113 | scope->parent = parent; | ||
114 | scope->symbols = ht_init(sym_hash, sym_eq); | ||
115 | scope->types = ht_init(type_hash, type_eq); | ||
116 | return scope; | ||
117 | } | ||
118 | |||
119 | Type * | ||
120 | find_type(Scope *scope, Node *type) { | ||
121 | // TODO: Normally default types will be used more often. Since we don't | ||
122 | // allow type shadowing, we should search first on the global scope. | ||
123 | while (scope != NULL) { | ||
124 | Type *ret = ht_lookup(scope->types, &type->string); | ||
125 | if (ret != NULL) { | ||
126 | return ret; | ||
127 | } | ||
128 | scope = scope->parent; | ||
129 | } | ||
130 | push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TYPE, type->line, type->col); | ||
131 | return NULL; | ||
132 | } | ||
133 | |||
134 | bool | ||
135 | insert_symbol(Scope *scope, Node *symbol, Symbol *val) { | ||
136 | // Check if symbol already exists. | ||
137 | HashTable *symbols = scope->symbols; | ||
138 | if (ht_lookup(symbols, symbol) != NULL) { | ||
139 | push_error(ERR_TYPE_PARSER, ERR_SYMBOL_REDEF, symbol->line, symbol->col); | ||
140 | return false; | ||
141 | } | ||
142 | ht_insert(symbols, symbol, val); | ||
143 | return true; | ||
144 | } | ||
145 | |||
146 | Type * | ||
147 | coerce_numeric_types(Type *a, Type *b) { | ||
148 | // TODO: Decide what to do with mixed numeric types. What are the promotion | ||
149 | // rules, etc. | ||
150 | if (a == &default_types[TYPE_U8]) { | ||
151 | if (b == &default_types[TYPE_U16] || | ||
152 | b == &default_types[TYPE_U32] || | ||
153 | b == &default_types[TYPE_U64]) { | ||
154 | return b; | ||
155 | } | ||
156 | } else if (a == &default_types[TYPE_U16]) { | ||
157 | if (b == &default_types[TYPE_U32] || | ||
158 | b == &default_types[TYPE_U64]) { | ||
159 | return b; | ||
160 | } | ||
161 | } else if (a == &default_types[TYPE_U32]) { | ||
162 | if (b == &default_types[TYPE_U64]) { | ||
163 | return b; | ||
164 | } | ||
165 | } else if (a == &default_types[TYPE_S8]) { | ||
166 | if (b == &default_types[TYPE_S16] || | ||
167 | b == &default_types[TYPE_S32] || | ||
168 | b == &default_types[TYPE_S64]) { | ||
169 | return b; | ||
170 | } | ||
171 | } else if (a == &default_types[TYPE_S16]) { | ||
172 | if (b == &default_types[TYPE_S32] || | ||
173 | b == &default_types[TYPE_S64]) { | ||
174 | return b; | ||
175 | } | ||
176 | } else if (a == &default_types[TYPE_S32]) { | ||
177 | if (b == &default_types[TYPE_S64]) { | ||
178 | return b; | ||
179 | } | ||
180 | } else if (a == &default_types[TYPE_F32]) { | ||
181 | if (b == &default_types[TYPE_F64]) { | ||
182 | return b; | ||
183 | } | ||
184 | } | ||
185 | return a; | ||
186 | } | ||
187 | |||
188 | bool | ||
189 | type_is_numeric(Type *t) { | ||
190 | if (t == &default_types[TYPE_U8] || | ||
191 | t == &default_types[TYPE_U16] || | ||
192 | t == &default_types[TYPE_U32] || | ||
193 | t == &default_types[TYPE_U64] || | ||
194 | t == &default_types[TYPE_S8] || | ||
195 | t == &default_types[TYPE_S16] || | ||
196 | t == &default_types[TYPE_S32] || | ||
197 | t == &default_types[TYPE_S64] || | ||
198 | t == &default_types[TYPE_F32] || | ||
199 | t == &default_types[TYPE_F64]) { | ||
200 | return true; | ||
201 | } | ||
202 | return false; | ||
203 | } | ||
204 | |||
205 | Symbol * | ||
206 | find_symbol(Scope *scope, Node *node) { | ||
207 | while (scope != NULL) { | ||
208 | Symbol *val = ht_lookup(scope->symbols, node); | ||
209 | if (val != NULL) { | ||
210 | return val; | ||
211 | } | ||
212 | scope = scope->parent; | ||
213 | } | ||
214 | push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_SYMBOL, node->line, node->col); | ||
215 | return NULL; | ||
216 | } | ||
217 | |||
218 | bool | ||
219 | resolve_type(Scope *scope, Node *node) { | ||
220 | if (node->expr_type != NULL) { | ||
221 | return true; | ||
222 | } | ||
223 | switch (node->type) { | ||
224 | case NODE_BUILTIN: { | ||
225 | for (size_t i = 0; i < array_size(node->builtin.args); ++i) { | ||
226 | Node *arg = node->builtin.args[i]; | ||
227 | if (!resolve_type(scope, arg)) { | ||
228 | return false; | ||
229 | } | ||
230 | } | ||
231 | switch (node->builtin.type) { | ||
232 | // Numbers. | ||
233 | case TOKEN_ADD: | ||
234 | case TOKEN_SUB: | ||
235 | case TOKEN_MUL: | ||
236 | case TOKEN_DIV: | ||
237 | case TOKEN_MOD: { | ||
238 | Type *type = NULL; | ||
239 | for (size_t i = 0; i < array_size(node->builtin.args); ++i) { | ||
240 | Node *arg = node->builtin.args[i]; | ||
241 | |||
242 | // Check that all arguments are numbers. | ||
243 | if (!type_is_numeric(arg->expr_type)) { | ||
244 | push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_NUM, | ||
245 | arg->line, arg->col); | ||
246 | return false; | ||
247 | } | ||
248 | |||
249 | if (type == NULL) { | ||
250 | type = arg->expr_type; | ||
251 | } else if (type != arg->expr_type) { | ||
252 | type = coerce_numeric_types(type, arg->expr_type); | ||
253 | } | ||
254 | } | ||
255 | node->expr_type = type; | ||
256 | } break; | ||
257 | // Bools. | ||
258 | case TOKEN_NOT: | ||
259 | case TOKEN_AND: | ||
260 | case TOKEN_OR: { | ||
261 | // Check that all arguments are boolean. | ||
262 | for (size_t i = 0; i < array_size(node->builtin.args); ++i) { | ||
263 | Node *arg = node->builtin.args[i]; | ||
264 | if (arg->expr_type != &default_types[TYPE_BOOL]) { | ||
265 | push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_BOOL, | ||
266 | arg->line, arg->col); | ||
267 | return false; | ||
268 | } | ||
269 | } | ||
270 | node->expr_type = &default_types[TYPE_BOOL]; | ||
271 | } break; | ||
272 | case TOKEN_EQ: | ||
273 | case TOKEN_LT: | ||
274 | case TOKEN_GT: | ||
275 | case TOKEN_LE: | ||
276 | case TOKEN_GE: { | ||
277 | // Check that all arguments are nums. | ||
278 | for (size_t i = 0; i < array_size(node->builtin.args); ++i) { | ||
279 | Node *arg = node->builtin.args[i]; | ||
280 | if (!type_is_numeric(arg->expr_type)) { | ||
281 | push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_NUM, | ||
282 | arg->line, arg->col); | ||
283 | return false; | ||
284 | } | ||
285 | } | ||
286 | node->expr_type = &default_types[TYPE_BOOL]; | ||
287 | } break; | ||
288 | default: break; | ||
289 | } | ||
290 | } break; | ||
291 | case NODE_SYMBOL: { | ||
292 | Symbol *val = find_symbol(scope, node); | ||
293 | if (val == NULL) { | ||
294 | return false; | ||
295 | } | ||
296 | |||
297 | Type *type = NULL; | ||
298 | switch (val->type) { | ||
299 | case SYMBOL_VAR: { | ||
300 | type = find_type(scope, val->var.type); | ||
301 | } break; | ||
302 | case SYMBOL_FUN: { | ||
303 | type = find_type(scope, val->fun.return_type); | ||
304 | } break; | ||
305 | } | ||
306 | if (type == NULL) { | ||
307 | return false; | ||
308 | } | ||
309 | node->expr_type = type; | ||
310 | } break; | ||
311 | case NODE_FUN: { | ||
312 | // Fill up new scope with parameters | ||
313 | scope = alloc_scope(scope); | ||
314 | |||
315 | // Parameters. | ||
316 | for (size_t i = 0; i < array_size(node->fun.param_names); ++i) { | ||
317 | Node *param = node->fun.param_names[i]; | ||
318 | Node *type = node->fun.param_types[i]; | ||
319 | Symbol *var = alloc_symval(param, SYMBOL_VAR); | ||
320 | var->var.type = type; | ||
321 | if (!insert_symbol(scope, param, var)) { | ||
322 | return false; | ||
323 | } | ||
324 | } | ||
325 | |||
326 | // Body. | ||
327 | Node *body = node->fun.body; | ||
328 | if (body->type == NODE_BLOCK) { | ||
329 | for (size_t i = 0; i < array_size(body->block.expr); ++i) { | ||
330 | Node *expr = body->block.expr[i]; | ||
331 | if (!resolve_type(scope, expr)) { | ||
332 | return false; | ||
333 | } | ||
334 | } | ||
335 | Node *last_expr = body->block.expr[array_size(body->block.expr) - 1]; | ||
336 | node->expr_type = last_expr->expr_type; | ||
337 | } else { | ||
338 | if (!resolve_type(scope, body)) { | ||
339 | return false; | ||
340 | } | ||
341 | } | ||
342 | |||
343 | // Check that the type of body matches the return type. | ||
344 | StringView *type_body = &node->fun.body->expr_type->name; | ||
345 | StringView *return_type = &node->fun.return_type->string; | ||
346 | if (!sv_equal(type_body, return_type)) { | ||
347 | push_error(ERR_TYPE_PARSER, ERR_WRONG_RET_TYPE, node->line, node->col); | ||
348 | return false; | ||
349 | } | ||
350 | } break; | ||
351 | case NODE_BLOCK: { | ||
352 | scope = alloc_scope(scope); | ||
353 | for (size_t i = 0; i < array_size(node->block.expr); ++i) { | ||
354 | Node *expr = node->block.expr[i]; | ||
355 | if (!resolve_type(scope, expr)) { | ||
356 | return false; | ||
357 | } | ||
358 | } | ||
359 | Node *last_expr = node->block.expr[array_size(node->block.expr) - 1]; | ||
360 | node->expr_type = last_expr->expr_type; | ||
361 | } break; | ||
362 | case NODE_IF: { | ||
363 | if (!resolve_type(scope, node->ifexpr.cond)) { | ||
364 | return false; | ||
365 | } | ||
366 | if (!resolve_type(scope, node->ifexpr.expr_true)) { | ||
367 | return false; | ||
368 | } | ||
369 | Type *type_true = node->ifexpr.expr_true->expr_type; | ||
370 | node->expr_type = type_true; | ||
371 | if (node->ifexpr.expr_false != NULL) { | ||
372 | if (!resolve_type(scope, node->ifexpr.expr_false)) { | ||
373 | return false; | ||
374 | } | ||
375 | } | ||
376 | |||
377 | // Check ifexpr.cond is a bool. | ||
378 | Type *type_cond = node->ifexpr.cond->expr_type; | ||
379 | if (!sv_equal(&type_cond->name, &default_types[TYPE_BOOL].name)) { | ||
380 | push_error(ERR_TYPE_PARSER, ERR_WRONG_COND_TYPE, | ||
381 | node->line, node->col); | ||
382 | return false; | ||
383 | } | ||
384 | |||
385 | // Check if types of expr_true and expr_false match | ||
386 | if (node->ifexpr.expr_false != NULL) { | ||
387 | Type *type_false = node->ifexpr.expr_false->expr_type; | ||
388 | if (type_is_numeric(type_true) && type_is_numeric(type_false)) { | ||
389 | node->expr_type = coerce_numeric_types(type_true, type_false); | ||
390 | } else if (!sv_equal(&type_true->name, &type_false->name)) { | ||
391 | push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_T_F, | ||
392 | node->line, node->col); | ||
393 | return false; | ||
394 | } | ||
395 | } | ||
396 | } break; | ||
397 | case NODE_SET: { | ||
398 | node->expr_type = &default_types[TYPE_VOID]; | ||
399 | if (!resolve_type(scope, node->set.symbol)) { | ||
400 | return false; | ||
401 | } | ||
402 | if (!resolve_type(scope, node->set.value)) { | ||
403 | return false; | ||
404 | } | ||
405 | Node *symbol = node->set.symbol; | ||
406 | Node *value = node->set.value; | ||
407 | if (!sv_equal(&symbol->expr_type->name, &value->expr_type->name)) { | ||
408 | push_error(ERR_TYPE_PARSER, ERR_TYPE_MISMATCH, | ||
409 | node->line, node->col); | ||
410 | return false; | ||
411 | } | ||
412 | } break; | ||
413 | case NODE_DEF: { | ||
414 | // Prepare value for symbol table. | ||
415 | Symbol *var = alloc_symval(node->def.symbol, SYMBOL_VAR); | ||
416 | var->var.type = node->def.type; | ||
417 | if (!insert_symbol(scope, node->def.symbol, var)) { | ||
418 | return false; | ||
419 | } | ||
420 | |||
421 | Type *type = find_type(scope, node->def.type); | ||
422 | if (type == NULL) { | ||
423 | return false; | ||
424 | } | ||
425 | node->def.symbol->expr_type = type; | ||
426 | |||
427 | node->expr_type = &default_types[TYPE_VOID]; | ||
428 | // TODO: type inference from right side when not annotated? | ||
429 | if (!resolve_type(scope, node->def.value)) { | ||
430 | return false; | ||
431 | } | ||
432 | Node *symbol = node->def.symbol; | ||
433 | Node *value = node->def.value; | ||
434 | if (!sv_equal(&symbol->expr_type->name, &value->expr_type->name)) { | ||
435 | push_error(ERR_TYPE_PARSER, ERR_TYPE_MISMATCH, | ||
436 | node->line, node->col); | ||
437 | return false; | ||
438 | } | ||
439 | } break; | ||
440 | case NODE_NUMBER: { | ||
441 | // TODO: Numbers are f64/s64 unless explicitely annotated. Annotated | ||
442 | // numbers must fit in the given range (e.g. no negative constants | ||
443 | // inside a u64, no numbers bigger than 255 in a u8, etc.). | ||
444 | if (node->number.fractional != 0) { | ||
445 | node->expr_type = &default_types[TYPE_F64]; | ||
446 | } else { | ||
447 | node->expr_type = &default_types[TYPE_S64]; | ||
448 | } | ||
449 | } break; | ||
450 | case NODE_BOOL: { | ||
451 | node->expr_type = &default_types[TYPE_BOOL]; | ||
452 | } break; | ||
453 | case NODE_STRING: { | ||
454 | node->expr_type = &default_types[TYPE_STR]; | ||
455 | } break; | ||
456 | case NODE_FUNCALL: { | ||
457 | Symbol *val = find_symbol(scope, node->funcall.name); | ||
458 | if (!resolve_type(scope, node->funcall.name)) { | ||
459 | return false; | ||
460 | } | ||
461 | if (val->type != SYMBOL_FUN) { | ||
462 | push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_FUN, | ||
463 | node->funcall.name->line, node->funcall.name->col); | ||
464 | return false; | ||
465 | } | ||
466 | if (array_size(node->funcall.args) != array_size(val->fun.param_types)) { | ||
467 | push_error(ERR_TYPE_PARSER, ERR_BAD_ARGS, node->line, node->col); | ||
468 | return false; | ||
469 | } | ||
470 | node->expr_type = node->funcall.name->expr_type; | ||
471 | for (size_t i = 0; i < array_size(node->funcall.args); ++i) { | ||
472 | Node *arg = node->funcall.args[i]; | ||
473 | if (!resolve_type(scope, arg)) { | ||
474 | return false; | ||
475 | } | ||
476 | Node *expected = val->fun.param_types[i]; | ||
477 | if (!sv_equal(&arg->expr_type->name, &expected->string)) { | ||
478 | push_error(ERR_TYPE_PARSER, ERR_TYPE_MISMATCH, | ||
479 | arg->line, arg->col); | ||
480 | return false; | ||
481 | } | ||
482 | } | ||
483 | } break; | ||
484 | default: break; | ||
485 | } | ||
486 | return true; | ||
487 | } | ||
488 | |||
489 | ParseTree * | ||
490 | semantic_analysis(Root *roots) { | ||
491 | ParseTree *parse_tree = malloc(sizeof(ParseTree)); | ||
492 | parse_tree->roots = roots; | ||
493 | parse_tree->global_scope = alloc_scope(NULL); | ||
494 | parse_tree->current_scope = parse_tree->global_scope; | ||
495 | |||
496 | // Fill global scope with default types. | ||
497 | HashTable *types = parse_tree->global_scope->types; | ||
498 | for (size_t i = 0; i < sizeof(default_types)/sizeof(Type); ++i) { | ||
499 | Type *type = &default_types[i]; | ||
500 | ht_insert(types, &type->name, type); | ||
501 | } | ||
502 | |||
503 | // Fill up global function symbols. | ||
504 | Scope *scope = parse_tree->global_scope; | ||
505 | for (size_t i = 0; i < array_size(parse_tree->roots); ++i) { | ||
506 | Node *root = parse_tree->roots[i]; | ||
507 | if (root->type == NODE_FUN) { | ||
508 | Node *name = root->fun.name; | ||
509 | Symbol *fun = alloc_symval(root->fun.name, SYMBOL_FUN); | ||
510 | fun->fun.param_types = root->fun.param_types; | ||
511 | fun->fun.return_type = root->fun.return_type; | ||
512 | if (!insert_symbol(scope, name, fun)) { | ||
513 | return NULL; | ||
514 | } | ||
515 | } | ||
516 | } | ||
517 | |||
518 | for (size_t i = 0; i < array_size(parse_tree->roots); ++i) { | ||
519 | // Fill up symbol tables in proper scope and resolve type of expression | ||
520 | // for all elements. | ||
521 | if (!resolve_type(scope, parse_tree->roots[i])) { | ||
522 | return NULL; | ||
523 | } | ||
524 | } | ||
525 | |||
526 | return parse_tree; | ||
527 | } | ||