diff options
author | Bad Diode <bd@badd10de.dev> | 2024-06-21 23:40:03 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2024-06-21 23:40:03 +0200 |
commit | 8931a6f22b9586c62082c525ec8b6de62c7de5d5 (patch) | |
tree | 8ab44ee3619893ae8f8acc195f9ac890710918cd /src/main.c | |
parent | 835f4d9f23f55a973d76ae9384b7b9d75da5472b (diff) | |
download | bdl-8931a6f22b9586c62082c525ec8b6de62c7de5d5.tar.gz bdl-8931a6f22b9586c62082c525ec8b6de62c7de5d5.zip |
Start implementing the typechecker
Diffstat (limited to 'src/main.c')
-rw-r--r-- | src/main.c | 160 |
1 files changed, 154 insertions, 6 deletions
@@ -6,6 +6,15 @@ | |||
6 | #include "lexer.c" | 6 | #include "lexer.c" |
7 | #include "parser.c" | 7 | #include "parser.c" |
8 | #include "vm.c" | 8 | #include "vm.c" |
9 | // TODO: To typecheck IF we need to make sure it always returns a value if we | ||
10 | // are using it as an expression. Not as straightforward as I initially | ||
11 | // imagined! An alternative is implicit zero value in the previous case: | ||
12 | // | ||
13 | // let a = if (expr) 123 | ||
14 | // | ||
15 | // If the expression evalueates to `true`, a will be 123, otherwise, 0. | ||
16 | // | ||
17 | // This pattern seems quite common in practice no? | ||
9 | 18 | ||
10 | typedef enum ExecMode { | 19 | typedef enum ExecMode { |
11 | RUN_NORMAL, | 20 | RUN_NORMAL, |
@@ -156,6 +165,21 @@ graph_symbols(Scope **scopes, Arena a) { | |||
156 | 165 | ||
157 | void | 166 | void |
158 | analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { | 167 | analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { |
168 | // TODO: Struct field initializers need to be checked... when we | ||
169 | // get to that. Note that for checking symbol chains, we need to | ||
170 | // first perform type analysis to know the variable is a certain | ||
171 | // type: | ||
172 | // | ||
173 | // struct T { | ||
174 | // field_a: int | ||
175 | // field_b: int | ||
176 | // } | ||
177 | // let a:my_struct | ||
178 | // set a.field_a = 1 | ||
179 | // | ||
180 | // In this case we need to know that variable a is of type `T` which | ||
181 | // contains fields `field_a` and `field_b`. | ||
182 | // | ||
159 | assert(a); | 183 | assert(a); |
160 | assert(scope); | 184 | assert(scope); |
161 | if (!node) { | 185 | if (!node) { |
@@ -214,12 +238,8 @@ analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { | |||
214 | analyzer_symbols(a, expr, scope); | 238 | analyzer_symbols(a, expr, scope); |
215 | } | 239 | } |
216 | } break; | 240 | } break; |
217 | // TODO: Struct field initializers need to be checked... when we | 241 | // TODO: |
218 | // get to that. | 242 | // case NODE_STRUCT_LIT: |
219 | // case NODE_VAL_FIELD: | ||
220 | // case NODE_ENUM: | ||
221 | // case NODE_STRUCT: | ||
222 | // case NODE_STRUCT_LIT: | ||
223 | case NODE_BLOCK: { | 243 | case NODE_BLOCK: { |
224 | Scope *next = scope_alloc(a, scope); | 244 | Scope *next = scope_alloc(a, scope); |
225 | node->scope = scope; | 245 | node->scope = scope; |
@@ -367,6 +387,126 @@ analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { | |||
367 | } | 387 | } |
368 | 388 | ||
369 | void | 389 | void |
390 | propagate_type(Analyzer *a, Node *node, Scope *scope) { | ||
391 | if (!node) { | ||
392 | return; | ||
393 | } | ||
394 | switch (node->kind) { | ||
395 | // case NODE_LET: { | ||
396 | // } break; | ||
397 | // Binary ops | ||
398 | case NODE_NUM_INT: | ||
399 | case NODE_NUM_UINT: { | ||
400 | // Check if the terminal correspond to an integer numeric type. | ||
401 | } break; | ||
402 | case NODE_ADD: | ||
403 | case NODE_SUB: | ||
404 | case NODE_DIV: | ||
405 | case NODE_MUL: | ||
406 | case NODE_MOD: | ||
407 | case NODE_NOT: | ||
408 | case NODE_AND: | ||
409 | case NODE_OR: | ||
410 | case NODE_EQ: | ||
411 | case NODE_NEQ: | ||
412 | case NODE_LT: | ||
413 | case NODE_GT: | ||
414 | case NODE_LE: | ||
415 | case NODE_GE: | ||
416 | case NODE_BITNOT: | ||
417 | case NODE_BITAND: | ||
418 | case NODE_BITOR: | ||
419 | case NODE_BITLSHIFT: | ||
420 | case NODE_BITRSHIFT: { | ||
421 | if (node->left) { | ||
422 | node->left->type = node->type; | ||
423 | propagate_type(a, node->left, scope); | ||
424 | } | ||
425 | if (node->right) { | ||
426 | node->right->type = node->type; | ||
427 | propagate_type(a, node->right, scope); | ||
428 | } | ||
429 | } break; | ||
430 | default: break; | ||
431 | } | ||
432 | } | ||
433 | |||
434 | void | ||
435 | analyzer_typecheck(Analyzer *a, Node *node, Scope *scope) { | ||
436 | assert(a); | ||
437 | assert(scope); | ||
438 | if (!node) { | ||
439 | return; | ||
440 | } | ||
441 | switch (node->kind) { | ||
442 | case NODE_NUM_INT: | ||
443 | case NODE_NUM_UINT: { | ||
444 | // TODO: Check if the terminal correspond to an integer numeric type. | ||
445 | printf("DING\n"); | ||
446 | } break; | ||
447 | case NODE_LET: { | ||
448 | // Check the value first to avoid recursive symbol usage. | ||
449 | // analyzer_symbols(a, node->var_val, scope); | ||
450 | |||
451 | if (node->var_type) { | ||
452 | Str type = node->var_type->value.str; | ||
453 | // TODO: register type on symbol table? | ||
454 | // println("found let type: %s", type); | ||
455 | // Propagate down. | ||
456 | if (node->var_val) { | ||
457 | node->var_val->type = type; | ||
458 | propagate_type(a, node->var_val, scope); | ||
459 | analyzer_typecheck(a, node->var_val, scope); | ||
460 | } | ||
461 | return; | ||
462 | } | ||
463 | // TODO: perform inference | ||
464 | if (node->var_val) { | ||
465 | // analyzer_typecheck(a, node->var_val, scope); | ||
466 | } | ||
467 | // TODO: Type check | ||
468 | // if (symmap_lookup(&scope->symbols, symbol) != NULL) { | ||
469 | // eprintln( | ||
470 | // "%s:%d:%d: error: symbol '%s' already exists in current " | ||
471 | // " scope ", | ||
472 | // a->file_name, node->var_name->line, node->var_name->col, | ||
473 | // symbol); | ||
474 | // } | ||
475 | // symmap_insert(&scope->symbols, symbol, | ||
476 | // (Symbol){.kind = SYM_VAR, .name = symbol}, | ||
477 | // a->storage); | ||
478 | } break; | ||
479 | case NODE_ADD: | ||
480 | case NODE_SUB: | ||
481 | case NODE_DIV: | ||
482 | case NODE_MUL: | ||
483 | case NODE_MOD: | ||
484 | case NODE_NOT: | ||
485 | case NODE_AND: | ||
486 | case NODE_OR: | ||
487 | case NODE_EQ: | ||
488 | case NODE_NEQ: | ||
489 | case NODE_LT: | ||
490 | case NODE_GT: | ||
491 | case NODE_LE: | ||
492 | case NODE_GE: | ||
493 | case NODE_BITNOT: | ||
494 | case NODE_BITAND: | ||
495 | case NODE_BITOR: | ||
496 | case NODE_BITLSHIFT: | ||
497 | case NODE_BITRSHIFT: { | ||
498 | if (node->left) { | ||
499 | analyzer_typecheck(a, node->left, scope); | ||
500 | } | ||
501 | if (node->right) { | ||
502 | analyzer_typecheck(a, node->right, scope); | ||
503 | } | ||
504 | } break; | ||
505 | default: break; | ||
506 | } | ||
507 | } | ||
508 | |||
509 | void | ||
370 | symbolic_analysis(Analyzer *a, Parser *parser) { | 510 | symbolic_analysis(Analyzer *a, Parser *parser) { |
371 | Scope *scope = scope_alloc(a, NULL); | 511 | Scope *scope = scope_alloc(a, NULL); |
372 | assert(a); | 512 | assert(a); |
@@ -407,6 +547,10 @@ symbolic_analysis(Analyzer *a, Parser *parser) { | |||
407 | Node *root = parser->nodes[i]; | 547 | Node *root = parser->nodes[i]; |
408 | analyzer_symbols(a, root, scope); | 548 | analyzer_symbols(a, root, scope); |
409 | } | 549 | } |
550 | for (sz i = 0; i < array_size(parser->nodes); i++) { | ||
551 | Node *root = parser->nodes[i]; | ||
552 | analyzer_typecheck(a, root, scope); | ||
553 | } | ||
410 | } | 554 | } |
411 | 555 | ||
412 | void | 556 | void |
@@ -484,6 +628,10 @@ process_file(Str path) { | |||
484 | if (mode == PRINT_SYMTABLES) { | 628 | if (mode == PRINT_SYMTABLES) { |
485 | graph_symbols(analyzer.scopes, lexer_arena); | 629 | graph_symbols(analyzer.scopes, lexer_arena); |
486 | } | 630 | } |
631 | if (mode == PRINT_SEMANTIC) { | ||
632 | graph_ast(parser.nodes); | ||
633 | goto stop; | ||
634 | } | ||
487 | 635 | ||
488 | #if DEBUG == 1 | 636 | #if DEBUG == 1 |
489 | for (sz i = 0; i < array_size(analyzer.scopes); i++) { | 637 | for (sz i = 0; i < array_size(analyzer.scopes); i++) { |