diff options
author | Bad Diode <bd@badd10de.dev> | 2022-04-06 22:02:55 -0300 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2022-04-06 22:02:55 -0300 |
commit | 8186228baa8a4e646e2f11c0dbbf6a023079f8eb (patch) | |
tree | 56387d7a0fc52b2b9691c6d01202b2fb6338b77a | |
parent | 725ba80c048069d15b2668ad0fa0e123819ec410 (diff) | |
download | bdl-8186228baa8a4e646e2f11c0dbbf6a023079f8eb.tar.gz bdl-8186228baa8a4e646e2f11c0dbbf6a023079f8eb.zip |
Add initial implementation of symbol checking
-rw-r--r-- | src/errors.c | 2 | ||||
-rw-r--r-- | src/errors.h | 2 | ||||
-rw-r--r-- | src/nodes.c | 2 | ||||
-rw-r--r-- | src/nodes.h | 4 | ||||
-rw-r--r-- | src/parser.c | 223 | ||||
-rw-r--r-- | src/parser.h | 9 |
6 files changed, 221 insertions, 21 deletions
diff --git a/src/errors.c b/src/errors.c index b93b462..2781bf5 100644 --- a/src/errors.c +++ b/src/errors.c | |||
@@ -15,6 +15,8 @@ static const char* error_msgs[] = { | |||
15 | [ERR_NOT_A_BOOL] = "error: expected a bool", | 15 | [ERR_NOT_A_BOOL] = "error: expected a bool", |
16 | [ERR_NOT_A_LPAREN] = "error: expected opening parentheses (lparen)", | 16 | [ERR_NOT_A_LPAREN] = "error: expected opening parentheses (lparen)", |
17 | [ERR_NOT_A_RPAREN] = "error: expected closing parentheses (rparen)", | 17 | [ERR_NOT_A_RPAREN] = "error: expected closing parentheses (rparen)", |
18 | [ERR_SYMBOL_REDEF] = "error: symbol redefinition", | ||
19 | [ERR_UNKNOWN_SYMBOL] = "error: unknown symbol", | ||
18 | }; | 20 | }; |
19 | 21 | ||
20 | static Error current_error = {.value = ERR_OK}; | 22 | static Error current_error = {.value = ERR_OK}; |
diff --git a/src/errors.h b/src/errors.h index 871711b..eb83e52 100644 --- a/src/errors.h +++ b/src/errors.h | |||
@@ -23,6 +23,8 @@ typedef enum ErrorValue { | |||
23 | ERR_NOT_A_BOOL, | 23 | ERR_NOT_A_BOOL, |
24 | ERR_NOT_A_LPAREN, | 24 | ERR_NOT_A_LPAREN, |
25 | ERR_NOT_A_RPAREN, | 25 | ERR_NOT_A_RPAREN, |
26 | ERR_SYMBOL_REDEF, | ||
27 | ERR_UNKNOWN_SYMBOL, | ||
26 | ERR_OK, | 28 | ERR_OK, |
27 | } ErrorValue; | 29 | } ErrorValue; |
28 | 30 | ||
diff --git a/src/nodes.c b/src/nodes.c index af3b772..a420189 100644 --- a/src/nodes.c +++ b/src/nodes.c | |||
@@ -6,6 +6,8 @@ alloc_node(NodeType type) { | |||
6 | // TODO: Free memory! | 6 | // TODO: Free memory! |
7 | Node *node = malloc(sizeof(Node)); | 7 | Node *node = malloc(sizeof(Node)); |
8 | node->type = type; | 8 | node->type = type; |
9 | node->line = 0; | ||
10 | node->col = 0; | ||
9 | return node; | 11 | return node; |
10 | } | 12 | } |
11 | 13 | ||
diff --git a/src/nodes.h b/src/nodes.h index 4fb48a1..cf0e749 100644 --- a/src/nodes.h +++ b/src/nodes.h | |||
@@ -17,6 +17,8 @@ typedef enum NodeType { | |||
17 | 17 | ||
18 | typedef struct Node { | 18 | typedef struct Node { |
19 | NodeType type; | 19 | NodeType type; |
20 | size_t line; | ||
21 | size_t col; | ||
20 | 22 | ||
21 | union { | 23 | union { |
22 | // Numbers. | 24 | // Numbers. |
@@ -26,7 +28,7 @@ typedef struct Node { | |||
26 | size_t fractional; | 28 | size_t fractional; |
27 | } number; | 29 | } number; |
28 | 30 | ||
29 | // String/symbol. | 31 | // String/symbol/type. |
30 | StringView string; | 32 | StringView string; |
31 | 33 | ||
32 | // Boolean. | 34 | // Boolean. |
diff --git a/src/parser.c b/src/parser.c index ba063f1..7cd22ae 100644 --- a/src/parser.c +++ b/src/parser.c | |||
@@ -111,6 +111,8 @@ parse_number(Parser *parser) { | |||
111 | node->number.negative = negative; | 111 | node->number.negative = negative; |
112 | node->number.integral = integral; | 112 | node->number.integral = integral; |
113 | node->number.fractional = fractional; | 113 | node->number.fractional = fractional; |
114 | node->line = tok.line; | ||
115 | node->col = tok.col; | ||
114 | return node; | 116 | return node; |
115 | } | 117 | } |
116 | 118 | ||
@@ -123,6 +125,8 @@ parse_string(Parser *parser) { | |||
123 | } | 125 | } |
124 | Node *node = alloc_node(NODE_STRING); | 126 | Node *node = alloc_node(NODE_STRING); |
125 | node->string = tok.value; | 127 | node->string = tok.value; |
128 | node->line = tok.line; | ||
129 | node->col = tok.col; | ||
126 | return node; | 130 | return node; |
127 | } | 131 | } |
128 | 132 | ||
@@ -135,6 +139,8 @@ parse_symbol(Parser *parser) { | |||
135 | } | 139 | } |
136 | Node *node = alloc_node(NODE_SYMBOL); | 140 | Node *node = alloc_node(NODE_SYMBOL); |
137 | node->string = tok.value; | 141 | node->string = tok.value; |
142 | node->line = tok.line; | ||
143 | node->col = tok.col; | ||
138 | return node; | 144 | return node; |
139 | } | 145 | } |
140 | 146 | ||
@@ -145,12 +151,14 @@ parse_type(Parser *parser) { | |||
145 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); | 151 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); |
146 | return NULL; | 152 | return NULL; |
147 | } | 153 | } |
154 | Node *node = alloc_node(NODE_TYPE); | ||
155 | node->line = tok.line; | ||
156 | node->col = tok.col; | ||
148 | tok = next_token(parser); | 157 | tok = next_token(parser); |
149 | if (tok.type != TOKEN_SYMBOL) { | 158 | if (tok.type != TOKEN_SYMBOL) { |
150 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); | 159 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); |
151 | return NULL; | 160 | return NULL; |
152 | } | 161 | } |
153 | Node *node = alloc_node(NODE_TYPE); | ||
154 | node->string = tok.value; | 162 | node->string = tok.value; |
155 | return node; | 163 | return node; |
156 | } | 164 | } |
@@ -164,14 +172,18 @@ parse_bool(Parser *parser) { | |||
164 | } | 172 | } |
165 | Node *node = alloc_node(NODE_BOOL); | 173 | Node *node = alloc_node(NODE_BOOL); |
166 | node->boolean = tok.type == TOKEN_TRUE; | 174 | node->boolean = tok.type == TOKEN_TRUE; |
175 | node->line = tok.line; | ||
176 | node->col = tok.col; | ||
167 | return node; | 177 | return node; |
168 | } | 178 | } |
169 | 179 | ||
170 | Node * | 180 | Node * |
171 | parse_builtin(Parser *parser) { | 181 | parse_builtin(Parser *parser) { |
172 | Token op = next_token(parser); | 182 | Token tok = next_token(parser); |
173 | Node *node = alloc_node(NODE_BUILTIN); | 183 | Node *node = alloc_node(NODE_BUILTIN); |
174 | node->builtin.type = op.type; | 184 | node->builtin.type = tok.type; |
185 | node->line = tok.line; | ||
186 | node->col = tok.col; | ||
175 | array_init(node->builtin.args, 0); | 187 | array_init(node->builtin.args, 0); |
176 | while (has_next(parser)) { | 188 | while (has_next(parser)) { |
177 | Token next = peek_token(parser); | 189 | Token next = peek_token(parser); |
@@ -185,13 +197,13 @@ parse_builtin(Parser *parser) { | |||
185 | } | 197 | } |
186 | array_push(node->builtin.args, arg); | 198 | array_push(node->builtin.args, arg); |
187 | } | 199 | } |
188 | push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, op.line, op.col); | 200 | push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); |
189 | return NULL; | 201 | return NULL; |
190 | } | 202 | } |
191 | 203 | ||
192 | Node * | 204 | Node * |
193 | parse_def(Parser *parser) { | 205 | parse_def(Parser *parser) { |
194 | next_token(parser); // Skip keyword. | 206 | Token tok = next_token(parser); |
195 | 207 | ||
196 | Node *symbol = parse_symbol(parser); | 208 | Node *symbol = parse_symbol(parser); |
197 | if (symbol == NULL) { | 209 | if (symbol == NULL) { |
@@ -218,12 +230,14 @@ parse_def(Parser *parser) { | |||
218 | node->def.symbol = symbol; | 230 | node->def.symbol = symbol; |
219 | node->def.value = value; | 231 | node->def.value = value; |
220 | node->def.type = type; | 232 | node->def.type = type; |
233 | node->line = tok.line; | ||
234 | node->col = tok.col; | ||
221 | return node; | 235 | return node; |
222 | } | 236 | } |
223 | 237 | ||
224 | Node * | 238 | Node * |
225 | parse_set(Parser *parser) { | 239 | parse_set(Parser *parser) { |
226 | next_token(parser); // Skip keyword. | 240 | Token tok = next_token(parser); |
227 | 241 | ||
228 | Node *symbol = parse_symbol(parser); | 242 | Node *symbol = parse_symbol(parser); |
229 | if (symbol == NULL) { | 243 | if (symbol == NULL) { |
@@ -242,12 +256,14 @@ parse_set(Parser *parser) { | |||
242 | Node *node = alloc_node(NODE_SET); | 256 | Node *node = alloc_node(NODE_SET); |
243 | node->set.symbol = symbol; | 257 | node->set.symbol = symbol; |
244 | node->set.value = value; | 258 | node->set.value = value; |
259 | node->line = tok.line; | ||
260 | node->col = tok.col; | ||
245 | return node; | 261 | return node; |
246 | } | 262 | } |
247 | 263 | ||
248 | Node * | 264 | Node * |
249 | parse_fun(Parser *parser) { | 265 | parse_fun(Parser *parser) { |
250 | next_token(parser); // Skip keyword. | 266 | Token tok = next_token(parser); |
251 | 267 | ||
252 | Node *name = parse_symbol(parser); | 268 | Node *name = parse_symbol(parser); |
253 | if (name == NULL) { | 269 | if (name == NULL) { |
@@ -258,6 +274,8 @@ parse_fun(Parser *parser) { | |||
258 | node->fun.name = name; | 274 | node->fun.name = name; |
259 | array_init(node->fun.param_names, 0); | 275 | array_init(node->fun.param_names, 0); |
260 | array_init(node->fun.param_types, 0); | 276 | array_init(node->fun.param_types, 0); |
277 | node->line = tok.line; | ||
278 | node->col = tok.col; | ||
261 | 279 | ||
262 | // Parse parameter list and return type. | 280 | // Parse parameter list and return type. |
263 | if (!consume_lparen(parser)) { | 281 | if (!consume_lparen(parser)) { |
@@ -300,12 +318,14 @@ parse_fun(Parser *parser) { | |||
300 | 318 | ||
301 | Node * | 319 | Node * |
302 | parse_if(Parser *parser) { | 320 | parse_if(Parser *parser) { |
303 | next_token(parser); // Skip keyword. | 321 | Token tok = next_token(parser); |
304 | 322 | ||
305 | Node *node = alloc_node(NODE_IF); | 323 | Node *node = alloc_node(NODE_IF); |
306 | node->ifexpr.cond = NULL; | 324 | node->ifexpr.cond = NULL; |
307 | node->ifexpr.expr_true = NULL; | 325 | node->ifexpr.expr_true = NULL; |
308 | node->ifexpr.expr_false = NULL; | 326 | node->ifexpr.expr_false = NULL; |
327 | node->line = tok.line; | ||
328 | node->col = tok.col; | ||
309 | 329 | ||
310 | Node *cond = parse_next(parser); | 330 | Node *cond = parse_next(parser); |
311 | if (cond == NULL) { | 331 | if (cond == NULL) { |
@@ -317,7 +337,7 @@ parse_if(Parser *parser) { | |||
317 | } | 337 | } |
318 | node->ifexpr.cond = cond; | 338 | node->ifexpr.cond = cond; |
319 | node->ifexpr.expr_true = expr_true; | 339 | node->ifexpr.expr_true = expr_true; |
320 | Token tok = peek_token(parser); | 340 | tok = peek_token(parser); |
321 | 341 | ||
322 | // Optional else statement. | 342 | // Optional else statement. |
323 | if (tok.type != TOKEN_RPAREN) { | 343 | if (tok.type != TOKEN_RPAREN) { |
@@ -370,10 +390,12 @@ parse_paren(Parser *parser) { | |||
370 | 390 | ||
371 | Node * | 391 | Node * |
372 | parse_block(Parser *parser) { | 392 | parse_block(Parser *parser) { |
373 | next_token(parser); // Skip paren. | 393 | Token tok = next_token(parser); |
374 | 394 | ||
375 | Node *node = alloc_node(NODE_BLOCK); | 395 | Node *node = alloc_node(NODE_BLOCK); |
376 | array_init(node->block.expr, 0); | 396 | array_init(node->block.expr, 0); |
397 | node->line = tok.line; | ||
398 | node->col = tok.col; | ||
377 | while (true) { | 399 | while (true) { |
378 | Token next = peek_token(parser); | 400 | Token next = peek_token(parser); |
379 | if (next.type == TOKEN_RCURLY) { | 401 | if (next.type == TOKEN_RCURLY) { |
@@ -408,13 +430,178 @@ parse_next(Parser *parser) { | |||
408 | } | 430 | } |
409 | } | 431 | } |
410 | 432 | ||
433 | uint64_t sym_hash(const struct HashTable *table, void *bytes) { | ||
434 | // TODO: implement | ||
435 | return 0; | ||
436 | } | ||
437 | |||
438 | bool sym_eq(void *a, void *b) { | ||
439 | Node *a_node = a; | ||
440 | Node *b_node = b; | ||
441 | assert(a_node->type == NODE_SYMBOL); | ||
442 | assert(b_node->type == NODE_SYMBOL); | ||
443 | return sv_equal(&a_node->string, &b_node->string); | ||
444 | } | ||
445 | |||
446 | Scope * | ||
447 | alloc_scope(Scope *parent) { | ||
448 | Scope *scope = malloc(sizeof(Scope)); | ||
449 | scope->parent = parent; | ||
450 | scope->symbols = ht_init(sym_hash, sym_eq); | ||
451 | return scope; | ||
452 | } | ||
453 | |||
411 | ParseTree * | 454 | ParseTree * |
412 | alloc_parsetree(void) { | 455 | alloc_parsetree(void) { |
413 | ParseTree *parse_tree = malloc(sizeof(ParseTree)); | 456 | ParseTree *parse_tree = malloc(sizeof(ParseTree)); |
414 | array_init(parse_tree->roots, 0); | 457 | array_init(parse_tree->roots, 0); |
458 | parse_tree->global_scope = alloc_scope(NULL); | ||
459 | parse_tree->current_scope = parse_tree->global_scope; | ||
460 | // TODO: Fill global scope with default types/symbols. | ||
415 | return parse_tree; | 461 | return parse_tree; |
416 | } | 462 | } |
417 | 463 | ||
464 | bool | ||
465 | insert_symbol(Parser *parser, Node *symbol) { | ||
466 | HashTable *symbols = parser->parse_tree->current_scope->symbols; | ||
467 | if (ht_lookup(symbols, symbol) != NULL) { | ||
468 | push_error(ERR_TYPE_PARSER, ERR_SYMBOL_REDEF, symbol->line, symbol->col); | ||
469 | return false; | ||
470 | } | ||
471 | ht_insert(symbols, symbol, symbol); | ||
472 | return true; | ||
473 | } | ||
474 | |||
475 | bool | ||
476 | parse_roots(Parser *parser) { | ||
477 | while (has_next(parser)) { | ||
478 | Node *node = parse_next(parser); | ||
479 | if (node == NULL) { | ||
480 | return false; | ||
481 | } | ||
482 | array_push(parser->parse_tree->roots, node); | ||
483 | } | ||
484 | return true; | ||
485 | } | ||
486 | |||
487 | bool | ||
488 | find_symbol(Parser *parser, Node *node) { | ||
489 | Scope *scope = parser->parse_tree->current_scope; | ||
490 | while (scope != NULL) { | ||
491 | if (ht_lookup(scope->symbols, node) != NULL) { | ||
492 | return true; | ||
493 | } | ||
494 | scope = scope->parent; | ||
495 | } | ||
496 | push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_SYMBOL, node->line, node->col); | ||
497 | return false; | ||
498 | } | ||
499 | |||
500 | bool | ||
501 | symbol_check(Parser *parser, Node *node) { | ||
502 | switch (node->type) { | ||
503 | case NODE_BUILTIN: { | ||
504 | for (size_t i = 0; i < array_size(node->builtin.args); ++i) { | ||
505 | Node *arg = node->builtin.args[i]; | ||
506 | symbol_check(parser, arg); | ||
507 | } | ||
508 | } break; | ||
509 | case NODE_SYMBOL: { | ||
510 | if (!find_symbol(parser, node)) { | ||
511 | return false; | ||
512 | } | ||
513 | } break; | ||
514 | case NODE_SET: { | ||
515 | Node *symbol = node->set.symbol; | ||
516 | if (!find_symbol(parser, symbol)) { | ||
517 | return false; | ||
518 | } | ||
519 | if (!symbol_check(parser, node->set.value)) { | ||
520 | return false; | ||
521 | } | ||
522 | } break; | ||
523 | case NODE_FUN: { | ||
524 | // TODO: Store scope in Node block? | ||
525 | Scope *prev_scope = parser->parse_tree->current_scope; | ||
526 | parser->parse_tree->current_scope = alloc_scope(prev_scope); | ||
527 | |||
528 | // Parameters. | ||
529 | for (size_t i = 0; i < array_size(node->fun.param_names); ++i) { | ||
530 | Node *param = node->fun.param_names[i]; | ||
531 | if (!insert_symbol(parser, param)) { | ||
532 | return false; | ||
533 | } | ||
534 | } | ||
535 | |||
536 | // Body. | ||
537 | Node *body = node->fun.body; | ||
538 | if (body->type == NODE_BLOCK) { | ||
539 | for (size_t i = 0; i < array_size(body->block.expr); ++i) { | ||
540 | Node *expr = body->block.expr[i]; | ||
541 | symbol_check(parser, expr); | ||
542 | } | ||
543 | } else { | ||
544 | symbol_check(parser, body); | ||
545 | } | ||
546 | parser->parse_tree->current_scope = prev_scope; | ||
547 | } break; | ||
548 | case NODE_BLOCK: { | ||
549 | // TODO: Store scope in Node block? | ||
550 | Scope *prev_scope = parser->parse_tree->current_scope; | ||
551 | parser->parse_tree->current_scope = alloc_scope(prev_scope); | ||
552 | for (size_t i = 0; i < array_size(node->block.expr); ++i) { | ||
553 | Node *expr = node->block.expr[i]; | ||
554 | symbol_check(parser, expr); | ||
555 | } | ||
556 | parser->parse_tree->current_scope = prev_scope; | ||
557 | } break; | ||
558 | case NODE_IF: { | ||
559 | if (!symbol_check(parser, node->ifexpr.cond)) { | ||
560 | return false; | ||
561 | } | ||
562 | if (!symbol_check(parser, node->ifexpr.expr_true)) { | ||
563 | return false; | ||
564 | } | ||
565 | if (node->ifexpr.expr_false != NULL) { | ||
566 | if (!symbol_check(parser, node->ifexpr.expr_false)) { | ||
567 | return false; | ||
568 | } | ||
569 | } | ||
570 | } break; | ||
571 | case NODE_DEF: { | ||
572 | if (!symbol_check(parser, node->def.value)) { | ||
573 | return false; | ||
574 | } | ||
575 | if (!insert_symbol(parser, node->def.symbol)) { | ||
576 | return false; | ||
577 | } | ||
578 | } break; | ||
579 | default: break; | ||
580 | } | ||
581 | return true; | ||
582 | } | ||
583 | |||
584 | bool | ||
585 | semantic_analysis(Parser *parser) { | ||
586 | // Fill up global function symbols. | ||
587 | for (size_t i = 0; i < array_size(parser->parse_tree->roots); ++i) { | ||
588 | Node *root = parser->parse_tree->roots[i]; | ||
589 | if (root->type == NODE_FUN) { | ||
590 | Node *name = root->fun.name; | ||
591 | if (!insert_symbol(parser, name)) { | ||
592 | return false; | ||
593 | } | ||
594 | } | ||
595 | } | ||
596 | |||
597 | // Fill up symbol tables in proper scope and check existance. | ||
598 | for (size_t i = 0; i < array_size(parser->parse_tree->roots); ++i) { | ||
599 | symbol_check(parser, parser->parse_tree->roots[i]); | ||
600 | } | ||
601 | |||
602 | return true; | ||
603 | } | ||
604 | |||
418 | ParseTree * | 605 | ParseTree * |
419 | parse(Token *tokens) { | 606 | parse(Token *tokens) { |
420 | Parser parser = { | 607 | Parser parser = { |
@@ -428,16 +615,12 @@ parse(Token *tokens) { | |||
428 | print_tokens(parser.tokens); | 615 | print_tokens(parser.tokens); |
429 | printf("------------\n"); | 616 | printf("------------\n"); |
430 | 617 | ||
431 | while (has_next(&parser)) { | 618 | if (!parse_roots(&parser)) { |
432 | Node *node = parse_next(&parser); | 619 | return NULL; |
433 | if (node == NULL) { | 620 | } |
434 | return NULL; | 621 | if (!semantic_analysis(&parser)) { |
435 | } | 622 | return NULL; |
436 | // DEBUG:... | ||
437 | print_node(node); | ||
438 | printf("\n"); | ||
439 | |||
440 | array_push(parser.parse_tree->roots, node); | ||
441 | } | 623 | } |
624 | |||
442 | return parser.parse_tree; | 625 | return parser.parse_tree; |
443 | } | 626 | } |
diff --git a/src/parser.h b/src/parser.h index 4d871ad..3c2dc2b 100644 --- a/src/parser.h +++ b/src/parser.h | |||
@@ -3,9 +3,18 @@ | |||
3 | 3 | ||
4 | #include "lexer.h" | 4 | #include "lexer.h" |
5 | #include "nodes.h" | 5 | #include "nodes.h" |
6 | #include "hashtable.h" | ||
7 | |||
8 | typedef struct Scope { | ||
9 | struct Scope *parent; | ||
10 | HashTable *symbols; | ||
11 | // HashTable types; | ||
12 | } Scope; | ||
6 | 13 | ||
7 | typedef struct ParseTree { | 14 | typedef struct ParseTree { |
8 | Node **roots; | 15 | Node **roots; |
16 | Scope *global_scope; | ||
17 | Scope *current_scope; | ||
9 | } ParseTree; | 18 | } ParseTree; |
10 | 19 | ||
11 | typedef struct Parser { | 20 | typedef struct Parser { |