diff options
author | Bad Diode <bd@badd10de.dev> | 2024-06-23 20:21:25 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2024-06-23 20:21:25 +0200 |
commit | caf4d4c7dd0fff6cdf69bf8cb27f3bbb6d02a366 (patch) | |
tree | a434ad3519f9a2e14a5db343754dc7960bc82d32 | |
parent | c6fd7856bfe5dd0567f672d0d1a70a3b698feaa4 (diff) | |
download | bdl-caf4d4c7dd0fff6cdf69bf8cb27f3bbb6d02a366.tar.gz bdl-caf4d4c7dd0fff6cdf69bf8cb27f3bbb6d02a366.zip |
Change typechecking to be independent of the symbolic checking
-rw-r--r-- | src/main.c | 158 | ||||
-rw-r--r-- | tests/semantics.bad | 16 |
2 files changed, 122 insertions, 52 deletions
@@ -70,15 +70,20 @@ typedef struct Scope { | |||
70 | sz id; | 70 | sz id; |
71 | sz depth; | 71 | sz depth; |
72 | SymbolMap *symbols; | 72 | SymbolMap *symbols; |
73 | TypeMap *types; | ||
74 | struct Scope *parent; | 73 | struct Scope *parent; |
75 | } Scope; | 74 | } Scope; |
76 | 75 | ||
76 | typedef struct TypeScope { | ||
77 | TypeMap *types; | ||
78 | struct TypeScope *parent; | ||
79 | } TypeScope; | ||
80 | |||
77 | typedef struct Analyzer { | 81 | typedef struct Analyzer { |
78 | Arena *storage; | 82 | Arena *storage; |
79 | Str file_name; | 83 | Str file_name; |
80 | sz scope_gen; | 84 | sz scope_gen; |
81 | Scope **scopes; | 85 | Scope **scopes; |
86 | TypeScope **types; | ||
82 | StrSet *numeric_types; | 87 | StrSet *numeric_types; |
83 | StrSet *integer_types; | 88 | StrSet *integer_types; |
84 | } Analyzer; | 89 | } Analyzer; |
@@ -94,6 +99,14 @@ scope_alloc(Analyzer *a, Scope *parent) { | |||
94 | return scope; | 99 | return scope; |
95 | } | 100 | } |
96 | 101 | ||
102 | TypeScope * | ||
103 | typescope_alloc(Analyzer *a, TypeScope *parent) { | ||
104 | TypeScope *scope = arena_calloc(sizeof(TypeScope), a->storage); | ||
105 | scope->parent = parent; | ||
106 | array_push(a->types, scope, a->storage); | ||
107 | return scope; | ||
108 | } | ||
109 | |||
97 | SymbolMap * | 110 | SymbolMap * |
98 | find_symbol(Scope *scope, Str symbol) { | 111 | find_symbol(Scope *scope, Str symbol) { |
99 | while (scope != NULL) { | 112 | while (scope != NULL) { |
@@ -107,7 +120,7 @@ find_symbol(Scope *scope, Str symbol) { | |||
107 | } | 120 | } |
108 | 121 | ||
109 | TypeMap * | 122 | TypeMap * |
110 | find_type(Scope *scope, Str type) { | 123 | find_type(TypeScope *scope, Str type) { |
111 | while (scope != NULL) { | 124 | while (scope != NULL) { |
112 | TypeMap *val = typemap_lookup(&scope->types, type); | 125 | TypeMap *val = typemap_lookup(&scope->types, type); |
113 | if (val != NULL) { | 126 | if (val != NULL) { |
@@ -135,10 +148,6 @@ graph_scope(Scope *scope, Arena a) { | |||
135 | "</TD><TD ALIGN=\"left\" > TYPE </TD></TR>"); | 148 | "</TD><TD ALIGN=\"left\" > TYPE </TD></TR>"); |
136 | while (sym) { | 149 | while (sym) { |
137 | Str type_name = cstr("nil"); | 150 | Str type_name = cstr("nil"); |
138 | TypeMap *type = find_type(scope, sym->val.name); | ||
139 | if (type) { | ||
140 | type_name = type->val; | ||
141 | } | ||
142 | print( | 151 | print( |
143 | "<TR><TD ALIGN=\"left\"> %s </TD><TD ALIGN=\"left\"> %s </TD><TD " | 152 | "<TR><TD ALIGN=\"left\"> %s </TD><TD ALIGN=\"left\"> %s </TD><TD " |
144 | "ALIGN=\"left\"> %s </TD></TR>", | 153 | "ALIGN=\"left\"> %s </TD></TR>", |
@@ -193,14 +202,62 @@ emit_semantic_error(Analyzer *a, Node *n, Str msg) { | |||
193 | } | 202 | } |
194 | 203 | ||
195 | Type | 204 | Type |
196 | type_inference(Analyzer *a, Node *node, Scope *scope) { | 205 | type_inference(Analyzer *a, Node *node, TypeScope *scope) { |
197 | assert(a); | 206 | assert(a); |
198 | assert(scope); | 207 | assert(scope); |
199 | assert(node); | 208 | assert(node); |
200 | // NOTE: For now we are not going to do implicit numeric conversions. | 209 | // NOTE: For now we are not going to do implicit numeric conversions. |
201 | switch (node->kind) { | 210 | switch (node->kind) { |
211 | case NODE_LET: { | ||
212 | node->type = cstr("nil"); | ||
213 | Str symbol = node->var_name->value.str; | ||
214 | if (node->var_type) { | ||
215 | Str type_name = node->var_type->value.str; | ||
216 | TypeMap *type = find_type(scope, type_name); | ||
217 | if (type == NULL) { | ||
218 | eprintln("%s:%d:%d: error: unknown type '%s'", a->file_name, | ||
219 | node->var_type->line, node->var_type->col, | ||
220 | type_name); | ||
221 | return cstr(""); | ||
222 | } | ||
223 | if (node->var_val) { | ||
224 | Type type = type_inference(a, node->var_val, scope); | ||
225 | if (!type.size) { | ||
226 | eprintln( | ||
227 | "%s:%d:%d: error: can't bind `nil` to variable " | ||
228 | "'%s'", | ||
229 | a->file_name, node->var_type->line, | ||
230 | node->var_type->col, symbol); | ||
231 | return cstr(""); | ||
232 | } | ||
233 | // TODO: Consider compatible types. | ||
234 | if (!str_eq(type, type_name)) { | ||
235 | eprintln( | ||
236 | "%s:%d:%d: error: type mismatch, trying to assing " | ||
237 | "%s" | ||
238 | " to a variable of type %s", | ||
239 | a->file_name, node->var_type->line, | ||
240 | node->var_type->col, type, type_name); | ||
241 | return cstr(""); | ||
242 | } | ||
243 | } | ||
244 | typemap_insert(&scope->types, symbol, type->val, a->storage); | ||
245 | return node->type; | ||
246 | } | ||
247 | |||
248 | // We don't know the type for this symbol, perform inference. | ||
249 | Type type = type_inference(a, node->var_val, scope); | ||
250 | if (type.size) { | ||
251 | typemap_insert(&scope->types, symbol, type, a->storage); | ||
252 | node->var_name->type = type; | ||
253 | } | ||
254 | return node->type; | ||
255 | } break; | ||
202 | case NODE_TRUE: | 256 | case NODE_TRUE: |
203 | case NODE_FALSE: return cstr("bool"); | 257 | case NODE_FALSE: { |
258 | node->type = cstr("bool"); | ||
259 | return node->type; | ||
260 | } break; | ||
204 | case NODE_NOT: | 261 | case NODE_NOT: |
205 | case NODE_AND: | 262 | case NODE_AND: |
206 | case NODE_OR: { | 263 | case NODE_OR: { |
@@ -208,17 +265,18 @@ type_inference(Analyzer *a, Node *node, Scope *scope) { | |||
208 | if (!str_eq(left, cstr("bool"))) { | 265 | if (!str_eq(left, cstr("bool"))) { |
209 | emit_semantic_error(a, node, | 266 | emit_semantic_error(a, node, |
210 | cstr("expected bool on logic expression")); | 267 | cstr("expected bool on logic expression")); |
211 | return (Type){0}; | 268 | return cstr(""); |
212 | } | 269 | } |
213 | if (node->right) { | 270 | if (node->right) { |
214 | Type right = type_inference(a, node->right, scope); | 271 | Type right = type_inference(a, node->right, scope); |
215 | if (!str_eq(right, cstr("bool"))) { | 272 | if (!str_eq(right, cstr("bool"))) { |
216 | emit_semantic_error( | 273 | emit_semantic_error( |
217 | a, node, cstr("expected bool on logic expression")); | 274 | a, node, cstr("expected bool on logic expression")); |
218 | return (Type){0}; | 275 | return cstr(""); |
219 | } | 276 | } |
220 | } | 277 | } |
221 | return cstr("bool"); | 278 | node->type = cstr("bool"); |
279 | return node->type; | ||
222 | } break; | 280 | } break; |
223 | case NODE_EQ: | 281 | case NODE_EQ: |
224 | case NODE_NEQ: | 282 | case NODE_NEQ: |
@@ -231,9 +289,10 @@ type_inference(Analyzer *a, Node *node, Scope *scope) { | |||
231 | if (!str_eq(left, right)) { | 289 | if (!str_eq(left, right)) { |
232 | emit_semantic_error( | 290 | emit_semantic_error( |
233 | a, node, cstr("mismatched types on binary expression")); | 291 | a, node, cstr("mismatched types on binary expression")); |
234 | return (Type){0}; | 292 | return cstr(""); |
235 | } | 293 | } |
236 | return cstr("bool"); | 294 | node->type = cstr("bool"); |
295 | return node->type; | ||
237 | } break; | 296 | } break; |
238 | case NODE_BITNOT: | 297 | case NODE_BITNOT: |
239 | case NODE_BITAND: | 298 | case NODE_BITAND: |
@@ -246,13 +305,15 @@ type_inference(Analyzer *a, Node *node, Scope *scope) { | |||
246 | !strset_lookup(&a->integer_types, right)) { | 305 | !strset_lookup(&a->integer_types, right)) { |
247 | emit_semantic_error( | 306 | emit_semantic_error( |
248 | a, node, cstr("non integer type on bit twiddling expr")); | 307 | a, node, cstr("non integer type on bit twiddling expr")); |
249 | return (Type){0}; | 308 | return cstr(""); |
250 | } | 309 | } |
251 | if (!str_eq(left, right)) { | 310 | if (!str_eq(left, right)) { |
252 | emit_semantic_error( | 311 | emit_semantic_error( |
253 | a, node, cstr("mismatched types on binary expression")); | 312 | a, node, cstr("mismatched types on binary expression")); |
254 | return (Type){0}; | 313 | return cstr(""); |
255 | } | 314 | } |
315 | node->type = left; | ||
316 | return node->type; | ||
256 | } break; | 317 | } break; |
257 | case NODE_ADD: | 318 | case NODE_ADD: |
258 | case NODE_SUB: | 319 | case NODE_SUB: |
@@ -265,31 +326,56 @@ type_inference(Analyzer *a, Node *node, Scope *scope) { | |||
265 | !strset_lookup(&a->numeric_types, right)) { | 326 | !strset_lookup(&a->numeric_types, right)) { |
266 | emit_semantic_error( | 327 | emit_semantic_error( |
267 | a, node, cstr("non numeric type on arithmetic expr")); | 328 | a, node, cstr("non numeric type on arithmetic expr")); |
268 | return (Type){0}; | 329 | return cstr(""); |
269 | } | 330 | } |
270 | if (!str_eq(left, right)) { | 331 | if (!str_eq(left, right)) { |
271 | emit_semantic_error( | 332 | emit_semantic_error( |
272 | a, node, cstr("mismatched types on binary expression")); | 333 | a, node, cstr("mismatched types on binary expression")); |
273 | return (Type){0}; | 334 | return cstr(""); |
274 | } | 335 | } |
275 | return left; | 336 | node->type = left; |
337 | return node->type; | ||
338 | } break; | ||
339 | case NODE_NUM_UINT: { | ||
340 | node->type = cstr("uint"); | ||
341 | return node->type; | ||
342 | } break; | ||
343 | case NODE_NUM_INT: { | ||
344 | node->type = cstr("int"); | ||
345 | return node->type; | ||
346 | } break; | ||
347 | case NODE_NUM_FLOAT: { | ||
348 | node->type = cstr("f64"); | ||
349 | return node->type; | ||
350 | } break; | ||
351 | case NODE_STRING: { | ||
352 | node->type = cstr("str"); | ||
353 | return node->type; | ||
276 | } break; | 354 | } break; |
277 | case NODE_NUM_UINT: return cstr("uint"); break; | ||
278 | case NODE_NUM_INT: return cstr("int"); break; | ||
279 | case NODE_NUM_FLOAT: return cstr("f64"); break; | ||
280 | case NODE_SYMBOL: { | 355 | case NODE_SYMBOL: { |
281 | TypeMap *type = find_type(scope, node->value.str); | 356 | TypeMap *type = find_type(scope, node->value.str); |
282 | if (type) { | 357 | if (type) { |
283 | return type->val; | 358 | node->type = type->val; |
359 | return node->type; | ||
284 | } | 360 | } |
285 | } break; | 361 | } break; |
362 | case NODE_BLOCK: { | ||
363 | scope = typescope_alloc(a, scope); | ||
364 | Type type; | ||
365 | for (sz i = 0; i < array_size(node->elements); i++) { | ||
366 | Node *expr = node->elements[i]; | ||
367 | type = type_inference(a, expr, scope); | ||
368 | } | ||
369 | node->type = type; | ||
370 | return node->type; | ||
371 | } break; | ||
286 | default: { | 372 | default: { |
287 | emit_semantic_error(a, node, | 373 | emit_semantic_error(a, node, |
288 | cstr("type inference not implemented for this " | 374 | cstr("type inference not implemented for this " |
289 | "kind of expression")); | 375 | "kind of expression")); |
290 | } break; | 376 | } break; |
291 | } | 377 | } |
292 | return (Type){0}; | 378 | return cstr(""); |
293 | } | 379 | } |
294 | 380 | ||
295 | void | 381 | void |
@@ -492,28 +578,6 @@ analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { | |||
492 | symmap_insert(&scope->symbols, symbol, | 578 | symmap_insert(&scope->symbols, symbol, |
493 | (Symbol){.kind = SYM_VAR, .name = symbol}, | 579 | (Symbol){.kind = SYM_VAR, .name = symbol}, |
494 | a->storage); | 580 | a->storage); |
495 | |||
496 | if (node->var_type) { | ||
497 | Str type_name = node->var_type->value.str; | ||
498 | TypeMap *type = find_type(scope, type_name); | ||
499 | if (type == NULL) { | ||
500 | eprintln("%s:%d:%d: error: unknown type '%s'", a->file_name, | ||
501 | node->var_type->line, node->var_type->col, | ||
502 | type_name); | ||
503 | return; | ||
504 | } | ||
505 | typemap_insert(&scope->types, symbol, type->val, a->storage); | ||
506 | // TODO: Typecheck the expression if we have it. | ||
507 | // if (node->var_val) ... | ||
508 | return; | ||
509 | } | ||
510 | |||
511 | // We don't know the type for this symbol, perform inference. | ||
512 | Type type = type_inference(a, node->var_val, scope); | ||
513 | if (type.size) { | ||
514 | typemap_insert(&scope->types, symbol, type, a->storage); | ||
515 | node->var_name->type = type; | ||
516 | } | ||
517 | } break; | 581 | } break; |
518 | // Binary ops | 582 | // Binary ops |
519 | case NODE_ADD: | 583 | case NODE_ADD: |
@@ -545,6 +609,7 @@ analyzer_symbols(Analyzer *a, Node *node, Scope *scope) { | |||
545 | void | 609 | void |
546 | symbolic_analysis(Analyzer *a, Parser *parser) { | 610 | symbolic_analysis(Analyzer *a, Parser *parser) { |
547 | Scope *scope = scope_alloc(a, NULL); | 611 | Scope *scope = scope_alloc(a, NULL); |
612 | TypeScope *types = typescope_alloc(a, NULL); | ||
548 | assert(a); | 613 | assert(a); |
549 | assert(parser); | 614 | assert(parser); |
550 | assert(scope); | 615 | assert(scope); |
@@ -566,7 +631,7 @@ symbolic_analysis(Analyzer *a, Parser *parser) { | |||
566 | cstr("uint"), cstr("str"), cstr("bool"), cstr("nil")}; | 631 | cstr("uint"), cstr("str"), cstr("bool"), cstr("nil")}; |
567 | for (sz i = 0; i < LEN(builtin_types); i++) { | 632 | for (sz i = 0; i < LEN(builtin_types); i++) { |
568 | Type type = builtin_types[i]; | 633 | Type type = builtin_types[i]; |
569 | typemap_insert(&scope->types, type, type, a->storage); | 634 | typemap_insert(&types->types, type, type, a->storage); |
570 | } | 635 | } |
571 | Type numeric_types[] = { | 636 | Type numeric_types[] = { |
572 | cstr("u8"), cstr("s8"), cstr("u16"), cstr("s16"), cstr("u32"), | 637 | cstr("u8"), cstr("s8"), cstr("u16"), cstr("s16"), cstr("u32"), |
@@ -608,6 +673,7 @@ symbolic_analysis(Analyzer *a, Parser *parser) { | |||
608 | // Recursively fill symbol tables. | 673 | // Recursively fill symbol tables. |
609 | for (sz i = 0; i < array_size(parser->nodes); i++) { | 674 | for (sz i = 0; i < array_size(parser->nodes); i++) { |
610 | Node *root = parser->nodes[i]; | 675 | Node *root = parser->nodes[i]; |
676 | type_inference(a, root, types); | ||
611 | analyzer_symbols(a, root, scope); | 677 | analyzer_symbols(a, root, scope); |
612 | } | 678 | } |
613 | // for (sz i = 0; i < array_size(parser->nodes); i++) { | 679 | // for (sz i = 0; i < array_size(parser->nodes); i++) { |
diff --git a/tests/semantics.bad b/tests/semantics.bad index d1effe4..158cec6 100644 --- a/tests/semantics.bad +++ b/tests/semantics.bad | |||
@@ -1,12 +1,16 @@ | |||
1 | ; let a:f32 = (1.0 + 2.0 * 2.0) / 2.0 | 1 | ; let a:f32 = (1.0 + 2.0 * 2.0) / 2.0 |
2 | 2 | ||
3 | let a:int = (1 + 2 * 2) / 2 | 3 | let annotated:int = (1 + 2 * 2) / 2 |
4 | let b = 1 | 4 | let numbers = 1 |
5 | let c = b | 5 | let symbols = numbers |
6 | let d = 1 + 2 * 4 | 6 | let arith = 1 + 2 * 4 |
7 | let e = 1 <= 2 | 7 | let cmp = 1 <= 2 |
8 | let booleans = !true && false || (1 <= 2) | 8 | let logic = !true && false || (1 <= 2) |
9 | let bits = 0xff & 0b00001111 | 9 | let bits = 0xff & 0b00001111 |
10 | let block = { | ||
11 | let a = 1 + 2 | ||
12 | a + 3 | ||
13 | } | ||
10 | 14 | ||
11 | ; enum test { | 15 | ; enum test { |
12 | ; a = 1 | 16 | ; a = 1 |