diff options
author | Bad Diode <bd@badd10de.dev> | 2024-06-21 23:40:03 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2024-06-21 23:40:03 +0200 |
commit | 8931a6f22b9586c62082c525ec8b6de62c7de5d5 (patch) | |
tree | 8ab44ee3619893ae8f8acc195f9ac890710918cd | |
parent | 835f4d9f23f55a973d76ae9384b7b9d75da5472b (diff) | |
download | bdl-8931a6f22b9586c62082c525ec8b6de62c7de5d5.tar.gz bdl-8931a6f22b9586c62082c525ec8b6de62c7de5d5.zip |
Start implementing the typechecker
-rw-r--r-- | src/main.c | 160 | ||||
-rw-r--r-- | src/parser.c | 4 | ||||
-rw-r--r-- | tests/semantics.bad | 120 |
3 files changed, 225 insertions, 59 deletions
@@ -6,6 +6,15 @@ | |||
6 | #include "lexer.c" | 6 | #include "lexer.c" |
7 | #include "parser.c" | 7 | #include "parser.c" |
8 | #include "vm.c" | 8 | #include "vm.c" |
9 | // TODO: To typecheck IF we need to make sure it always returns a value if we | ||
10 | // are using it as an expression. Not as straightforward as I initially | ||
11 | // imagined! An alternative is implicit zero value in the previous case: | ||
12 | // | ||
13 | // let a = if (expr) 123 | ||
14 | // | ||
15 | // If the expression evalueates to `true`, a will be 123, otherwise, 0. | ||
16 | // | ||
17 | // This pattern seems quite common in practice no? | ||
9 | 18 | ||
10 | typedef enum ExecMode { | 19 | typedef enum ExecMode { |
11 | RUN_NORMAL, | 20 | RUN_NORMAL, |
@@ -156,6 +165,21 @@ graph_symbols(Scope **scopes, Arena a) { | |||
156 | 165 | ||
157 | void | 166 | void |
158 | analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { | 167 | analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { |
168 | // TODO: Struct field initializers need to be checked... when we | ||
169 | // get to that. Note that for checking symbol chains, we need to | ||
170 | // first perform type analysis to know the variable is a certain | ||
171 | // type: | ||
172 | // | ||
173 | // struct T { | ||
174 | // field_a: int | ||
175 | // field_b: int | ||
176 | // } | ||
177 | // let a:my_struct | ||
178 | // set a.field_a = 1 | ||
179 | // | ||
180 | // In this case we need to know that variable a is of type `T` which | ||
181 | // contains fields `field_a` and `field_b`. | ||
182 | // | ||
159 | assert(a); | 183 | assert(a); |
160 | assert(scope); | 184 | assert(scope); |
161 | if (!node) { | 185 | if (!node) { |
@@ -214,12 +238,8 @@ analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { | |||
214 | analyzer_symbols(a, expr, scope); | 238 | analyzer_symbols(a, expr, scope); |
215 | } | 239 | } |
216 | } break; | 240 | } break; |
217 | // TODO: Struct field initializers need to be checked... when we | 241 | // TODO: |
218 | // get to that. | 242 | // case NODE_STRUCT_LIT: |
219 | // case NODE_VAL_FIELD: | ||
220 | // case NODE_ENUM: | ||
221 | // case NODE_STRUCT: | ||
222 | // case NODE_STRUCT_LIT: | ||
223 | case NODE_BLOCK: { | 243 | case NODE_BLOCK: { |
224 | Scope *next = scope_alloc(a, scope); | 244 | Scope *next = scope_alloc(a, scope); |
225 | node->scope = scope; | 245 | node->scope = scope; |
@@ -367,6 +387,126 @@ analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { | |||
367 | } | 387 | } |
368 | 388 | ||
369 | void | 389 | void |
390 | propagate_type(Analyzer *a, Node *node, Scope *scope) { | ||
391 | if (!node) { | ||
392 | return; | ||
393 | } | ||
394 | switch (node->kind) { | ||
395 | // case NODE_LET: { | ||
396 | // } break; | ||
397 | // Binary ops | ||
398 | case NODE_NUM_INT: | ||
399 | case NODE_NUM_UINT: { | ||
400 | // Check if the terminal correspond to an integer numeric type. | ||
401 | } break; | ||
402 | case NODE_ADD: | ||
403 | case NODE_SUB: | ||
404 | case NODE_DIV: | ||
405 | case NODE_MUL: | ||
406 | case NODE_MOD: | ||
407 | case NODE_NOT: | ||
408 | case NODE_AND: | ||
409 | case NODE_OR: | ||
410 | case NODE_EQ: | ||
411 | case NODE_NEQ: | ||
412 | case NODE_LT: | ||
413 | case NODE_GT: | ||
414 | case NODE_LE: | ||
415 | case NODE_GE: | ||
416 | case NODE_BITNOT: | ||
417 | case NODE_BITAND: | ||
418 | case NODE_BITOR: | ||
419 | case NODE_BITLSHIFT: | ||
420 | case NODE_BITRSHIFT: { | ||
421 | if (node->left) { | ||
422 | node->left->type = node->type; | ||
423 | propagate_type(a, node->left, scope); | ||
424 | } | ||
425 | if (node->right) { | ||
426 | node->right->type = node->type; | ||
427 | propagate_type(a, node->right, scope); | ||
428 | } | ||
429 | } break; | ||
430 | default: break; | ||
431 | } | ||
432 | } | ||
433 | |||
434 | void | ||
435 | analyzer_typecheck(Analyzer *a, Node *node, Scope *scope) { | ||
436 | assert(a); | ||
437 | assert(scope); | ||
438 | if (!node) { | ||
439 | return; | ||
440 | } | ||
441 | switch (node->kind) { | ||
442 | case NODE_NUM_INT: | ||
443 | case NODE_NUM_UINT: { | ||
444 | // TODO: Check if the terminal correspond to an integer numeric type. | ||
445 | printf("DING\n"); | ||
446 | } break; | ||
447 | case NODE_LET: { | ||
448 | // Check the value first to avoid recursive symbol usage. | ||
449 | // analyzer_symbols(a, node->var_val, scope); | ||
450 | |||
451 | if (node->var_type) { | ||
452 | Str type = node->var_type->value.str; | ||
453 | // TODO: register type on symbol table? | ||
454 | // println("found let type: %s", type); | ||
455 | // Propagate down. | ||
456 | if (node->var_val) { | ||
457 | node->var_val->type = type; | ||
458 | propagate_type(a, node->var_val, scope); | ||
459 | analyzer_typecheck(a, node->var_val, scope); | ||
460 | } | ||
461 | return; | ||
462 | } | ||
463 | // TODO: perform inference | ||
464 | if (node->var_val) { | ||
465 | // analyzer_typecheck(a, node->var_val, scope); | ||
466 | } | ||
467 | // TODO: Type check | ||
468 | // if (symmap_lookup(&scope->symbols, symbol) != NULL) { | ||
469 | // eprintln( | ||
470 | // "%s:%d:%d: error: symbol '%s' already exists in current " | ||
471 | // " scope ", | ||
472 | // a->file_name, node->var_name->line, node->var_name->col, | ||
473 | // symbol); | ||
474 | // } | ||
475 | // symmap_insert(&scope->symbols, symbol, | ||
476 | // (Symbol){.kind = SYM_VAR, .name = symbol}, | ||
477 | // a->storage); | ||
478 | } break; | ||
479 | case NODE_ADD: | ||
480 | case NODE_SUB: | ||
481 | case NODE_DIV: | ||
482 | case NODE_MUL: | ||
483 | case NODE_MOD: | ||
484 | case NODE_NOT: | ||
485 | case NODE_AND: | ||
486 | case NODE_OR: | ||
487 | case NODE_EQ: | ||
488 | case NODE_NEQ: | ||
489 | case NODE_LT: | ||
490 | case NODE_GT: | ||
491 | case NODE_LE: | ||
492 | case NODE_GE: | ||
493 | case NODE_BITNOT: | ||
494 | case NODE_BITAND: | ||
495 | case NODE_BITOR: | ||
496 | case NODE_BITLSHIFT: | ||
497 | case NODE_BITRSHIFT: { | ||
498 | if (node->left) { | ||
499 | analyzer_typecheck(a, node->left, scope); | ||
500 | } | ||
501 | if (node->right) { | ||
502 | analyzer_typecheck(a, node->right, scope); | ||
503 | } | ||
504 | } break; | ||
505 | default: break; | ||
506 | } | ||
507 | } | ||
508 | |||
509 | void | ||
370 | symbolic_analysis(Analyzer *a, Parser *parser) { | 510 | symbolic_analysis(Analyzer *a, Parser *parser) { |
371 | Scope *scope = scope_alloc(a, NULL); | 511 | Scope *scope = scope_alloc(a, NULL); |
372 | assert(a); | 512 | assert(a); |
@@ -407,6 +547,10 @@ symbolic_analysis(Analyzer *a, Parser *parser) { | |||
407 | Node *root = parser->nodes[i]; | 547 | Node *root = parser->nodes[i]; |
408 | analyzer_symbols(a, root, scope); | 548 | analyzer_symbols(a, root, scope); |
409 | } | 549 | } |
550 | for (sz i = 0; i < array_size(parser->nodes); i++) { | ||
551 | Node *root = parser->nodes[i]; | ||
552 | analyzer_typecheck(a, root, scope); | ||
553 | } | ||
410 | } | 554 | } |
411 | 555 | ||
412 | void | 556 | void |
@@ -484,6 +628,10 @@ process_file(Str path) { | |||
484 | if (mode == PRINT_SYMTABLES) { | 628 | if (mode == PRINT_SYMTABLES) { |
485 | graph_symbols(analyzer.scopes, lexer_arena); | 629 | graph_symbols(analyzer.scopes, lexer_arena); |
486 | } | 630 | } |
631 | if (mode == PRINT_SEMANTIC) { | ||
632 | graph_ast(parser.nodes); | ||
633 | goto stop; | ||
634 | } | ||
487 | 635 | ||
488 | #if DEBUG == 1 | 636 | #if DEBUG == 1 |
489 | for (sz i = 0; i < array_size(analyzer.scopes); i++) { | 637 | for (sz i = 0; i < array_size(analyzer.scopes); i++) { |
diff --git a/src/parser.c b/src/parser.c index 67fc739..7864264 100644 --- a/src/parser.c +++ b/src/parser.c | |||
@@ -193,6 +193,7 @@ typedef struct Node { | |||
193 | }; | 193 | }; |
194 | bool is_ptr; | 194 | bool is_ptr; |
195 | struct Scope *scope; | 195 | struct Scope *scope; |
196 | Str type; | ||
196 | } Node; | 197 | } Node; |
197 | 198 | ||
198 | // | 199 | // |
@@ -1018,6 +1019,9 @@ graph_node(Node *node) { | |||
1018 | } break; | 1019 | } break; |
1019 | default: break; | 1020 | default: break; |
1020 | } | 1021 | } |
1022 | if (node->type.size > 0) { | ||
1023 | print("| Type: %s", node->type); | ||
1024 | } | ||
1021 | println("\"];"); | 1025 | println("\"];"); |
1022 | 1026 | ||
1023 | switch (node->kind) { | 1027 | switch (node->kind) { |
diff --git a/tests/semantics.bad b/tests/semantics.bad index d050426..c331ebc 100644 --- a/tests/semantics.bad +++ b/tests/semantics.bad | |||
@@ -1,66 +1,80 @@ | |||
1 | fun nested(): u32 { | 1 | let a:u16 = (1 + 2 * 2) / 2 |
2 | fun adder(a: u32, b: u32): u32 a + b | 2 | ; enum test { |
3 | if (1 + 1) { | 3 | ; a = 1 |
4 | { | 4 | ; b |
5 | let b = 32 | 5 | ; } |
6 | } | 6 | ; ; Resolve struct accessors. |
7 | } | 7 | ; struct my_struct { |
8 | adder(1,2) | 8 | ; field_a: int |
9 | } | 9 | ; field_b: int |
10 | ; } | ||
10 | 11 | ||
11 | enum field { | 12 | ; let a:my_struct |
12 | a | 13 | ; set a.field_a = 1 |
13 | b | ||
14 | } | ||
15 | 14 | ||
16 | struct vec { | 15 | ; fun nested(): u32 { |
17 | a: int | 16 | ; fun adder(a: u32, b: u32): u32 a + b |
18 | b: int | 17 | ; if (1 + 1) { |
19 | } | 18 | ; { |
19 | ; let b = 32 | ||
20 | ; } | ||
21 | ; } | ||
22 | ; adder(1,2) | ||
23 | ; } | ||
20 | 24 | ||
21 | fun foo(): nil { | 25 | ; enum field { |
22 | bar() | 26 | ; a |
23 | } | 27 | ; b |
28 | ; } | ||
24 | 29 | ||
25 | fun bar(): nil { | 30 | ; struct vec { |
26 | foo() | 31 | ; a: int |
27 | } | 32 | ; b: int |
33 | ; } | ||
28 | 34 | ||
29 | ; There are some builtint functions. | 35 | ; fun foo(): nil { |
30 | println("hello world") | 36 | ; bar() |
37 | ; } | ||
31 | 38 | ||
32 | ; Let/set. | 39 | ; fun bar(): nil { |
33 | let num = 1 | 40 | ; foo() |
34 | set num = 2 | 41 | ; } |
35 | set num = 0 + num | 42 | |
43 | ; ; There are some builtint functions. | ||
44 | ; println("hello world") | ||
45 | |||
46 | ; ; Let/set. | ||
47 | ; let num = 1 | ||
48 | ; set num = 2 | ||
49 | ; set num = 0 + num | ||
36 | 50 | ||
37 | ; Loops and conditionals. | 51 | ; ; Loops and conditionals. |
38 | if (num) 3 else 1 | 52 | ; if (num) 3 else 1 |
39 | let val = if (2 + num == 4) num else num | 53 | ; let val = if (2 + num == 4) num else num |
40 | if (true) { | 54 | ; if (true) { |
41 | let c = 1 | 55 | ; let c = 1 |
42 | 1 + 1 + c | 56 | ; 1 + 1 + c |
43 | } | 57 | ; } |
44 | 58 | ||
45 | fun testmatches(): nil { | 59 | ; fun testmatches(): nil { |
46 | match (num) { | 60 | ; match (num) { |
47 | case 1 = 23 | 61 | ; case 1 = 23 |
48 | case 2 = num | 62 | ; case 2 = num |
49 | else = num | 63 | ; else = num |
50 | } | 64 | ; } |
51 | 65 | ||
52 | cond { | 66 | ; cond { |
53 | case 1 == 1 = 23 | 67 | ; case 1 == 1 = 23 |
54 | case 2 != 1 = num | 68 | ; case 2 != 1 = num |
55 | else = num | 69 | ; else = num |
56 | } | 70 | ; } |
57 | } | 71 | ; } |
58 | 72 | ||
59 | while (num == 1) num | 73 | ; while (num == 1) num |
60 | while (true) { | 74 | ; while (true) { |
61 | let c = 1 | 75 | ; let c = 1 |
62 | 1 + 1 + c | 76 | ; 1 + 1 + c |
63 | } | 77 | ; } |
64 | 78 | ||
65 | ; This should err. | 79 | ; This should err. |
66 | ; fun foo(): nil { | 80 | ; fun foo(): nil { |