aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2022-04-08 18:49:40 -0300
committerBad Diode <bd@badd10de.dev>2022-04-08 18:49:40 -0300
commit233dd92768a54060df9096558aa58c1f598cce7d (patch)
tree70cad899ab9767e6cc069192a763e2c0354b9f3f /src
parent55ecfb3b7713172f76ddbff022fa4d6a80d0661a (diff)
downloadbdl-233dd92768a54060df9096558aa58c1f598cce7d.tar.gz
bdl-233dd92768a54060df9096558aa58c1f598cce7d.zip
Add rudimentary type checking
Diffstat (limited to 'src')
-rw-r--r--src/errors.c5
-rw-r--r--src/errors.h5
-rw-r--r--src/nodes.c2
-rw-r--r--src/nodes.h10
-rw-r--r--src/parser.c233
-rw-r--r--src/parser.h7
-rw-r--r--src/viz.c7
7 files changed, 212 insertions, 57 deletions
diff --git a/src/errors.c b/src/errors.c
index 2781bf5..b987b34 100644
--- a/src/errors.c
+++ b/src/errors.c
@@ -17,6 +17,11 @@ static const char* error_msgs[] = {
17 [ERR_NOT_A_RPAREN] = "error: expected closing parentheses (rparen)", 17 [ERR_NOT_A_RPAREN] = "error: expected closing parentheses (rparen)",
18 [ERR_SYMBOL_REDEF] = "error: symbol redefinition", 18 [ERR_SYMBOL_REDEF] = "error: symbol redefinition",
19 [ERR_UNKNOWN_SYMBOL] = "error: unknown symbol", 19 [ERR_UNKNOWN_SYMBOL] = "error: unknown symbol",
20 [ERR_TYPE_REDEF] = "error: type redefinition",
21 [ERR_UNKNOWN_TYPE] = "error: unknown type",
22 [ERR_WRONG_RET_TYPE] = "error: return type don't match type signature",
23 [ERR_WRONG_COND_TYPE] = "error: conditional expression is not boolean",
24 [ERR_WRONG_TYPE_T_F] = "error: unmatched types between true and false expression",
20}; 25};
21 26
22static Error current_error = {.value = ERR_OK}; 27static Error current_error = {.value = ERR_OK};
diff --git a/src/errors.h b/src/errors.h
index eb83e52..a84305b 100644
--- a/src/errors.h
+++ b/src/errors.h
@@ -25,6 +25,11 @@ typedef enum ErrorValue {
25 ERR_NOT_A_RPAREN, 25 ERR_NOT_A_RPAREN,
26 ERR_SYMBOL_REDEF, 26 ERR_SYMBOL_REDEF,
27 ERR_UNKNOWN_SYMBOL, 27 ERR_UNKNOWN_SYMBOL,
28 ERR_TYPE_REDEF,
29 ERR_UNKNOWN_TYPE,
30 ERR_WRONG_RET_TYPE,
31 ERR_WRONG_COND_TYPE,
32 ERR_WRONG_TYPE_T_F,
28 ERR_OK, 33 ERR_OK,
29} ErrorValue; 34} ErrorValue;
30 35
diff --git a/src/nodes.c b/src/nodes.c
index 35d123a..51cc9ef 100644
--- a/src/nodes.c
+++ b/src/nodes.c
@@ -12,7 +12,7 @@ alloc_node(NodeType type) {
12 node->line = 0; 12 node->line = 0;
13 node->col = 0; 13 node->col = 0;
14 node->scope = NULL; 14 node->scope = NULL;
15 node->type_class = TYPE_UNK; 15 node->expr_type = NULL;
16 return node; 16 return node;
17} 17}
18 18
diff --git a/src/nodes.h b/src/nodes.h
index be6f7df..52022cc 100644
--- a/src/nodes.h
+++ b/src/nodes.h
@@ -15,21 +15,13 @@ typedef enum NodeType {
15 NODE_IF, 15 NODE_IF,
16} NodeType; 16} NodeType;
17 17
18typedef enum TypeClass {
19 TYPE_UNK,
20 TYPE_NONE,
21 TYPE_NUM,
22 TYPE_BOOL,
23 TYPE_STRING,
24} TypeClass;
25
26typedef struct Node { 18typedef struct Node {
27 size_t id; 19 size_t id;
28 NodeType type; 20 NodeType type;
29 size_t line; 21 size_t line;
30 size_t col; 22 size_t col;
31 struct Scope *scope; 23 struct Scope *scope;
32 TypeClass type_class; 24 struct Type *expr_type;
33 25
34 union { 26 union {
35 // Numbers. 27 // Numbers.
diff --git a/src/parser.c b/src/parser.c
index 0594d2c..ddcee56 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,6 +1,38 @@
1#include "parser.h" 1#include "parser.h"
2#include "darray.h" 2#include "darray.h"
3 3
4typedef enum DefaultType {
5 TYPE_VOID,
6 TYPE_BOOL,
7 TYPE_STR,
8 TYPE_U8,
9 TYPE_U16,
10 TYPE_U32,
11 TYPE_U64,
12 TYPE_S8,
13 TYPE_S16,
14 TYPE_S32,
15 TYPE_S64,
16 TYPE_F32,
17 TYPE_F64,
18} DefaultType;
19
20static Type default_types[] = {
21 [TYPE_VOID] = {STRING("void"), 0},
22 [TYPE_BOOL] = {STRING("bool"), 1},
23 [TYPE_STR] = {STRING("str"), 16}, // size (8) + pointer to data (8).
24 [TYPE_U8] = {STRING("u8"), 1},
25 [TYPE_U16] = {STRING("u16"), 2},
26 [TYPE_U32] = {STRING("u32"), 4},
27 [TYPE_U64] = {STRING("u64"), 8},
28 [TYPE_S8] = {STRING("s8"), 1},
29 [TYPE_S16] = {STRING("s16"), 2},
30 [TYPE_S32] = {STRING("s32"), 4},
31 [TYPE_S64] = {STRING("s64"), 8},
32 [TYPE_F32] = {STRING("f32"), 4},
33 [TYPE_F64] = {STRING("f64"), 8},
34};
35
4Token 36Token
5next_token(Parser *parser) { 37next_token(Parser *parser) {
6 return parser->tokens[parser->current_token++]; 38 return parser->tokens[parser->current_token++];
@@ -445,11 +477,25 @@ bool sym_eq(void *a, void *b) {
445 return sv_equal(&a_node->string, &b_node->string); 477 return sv_equal(&a_node->string, &b_node->string);
446} 478}
447 479
480u64 type_hash(const struct HashTable *table, void *bytes) {
481 StringView *type = bytes;
482 u64 hash = _xor_shift_hash(type->start, type->n);
483 hash = _fibonacci_hash(hash, table->shift_amount);
484 return hash;
485}
486
487bool type_eq(void *a, void *b) {
488 StringView *a_type = a;
489 StringView *b_type = b;
490 return sv_equal(a_type, b_type);
491}
492
448Scope * 493Scope *
449alloc_scope(Scope *parent) { 494alloc_scope(Scope *parent) {
450 Scope *scope = malloc(sizeof(Scope)); 495 Scope *scope = malloc(sizeof(Scope));
451 scope->parent = parent; 496 scope->parent = parent;
452 scope->symbols = ht_init(sym_hash, sym_eq); 497 scope->symbols = ht_init(sym_hash, sym_eq);
498 scope->types = ht_init(type_hash, type_eq);
453 return scope; 499 return scope;
454} 500}
455 501
@@ -459,19 +505,14 @@ alloc_parsetree(void) {
459 array_init(parse_tree->roots, 0); 505 array_init(parse_tree->roots, 0);
460 parse_tree->global_scope = alloc_scope(NULL); 506 parse_tree->global_scope = alloc_scope(NULL);
461 parse_tree->current_scope = parse_tree->global_scope; 507 parse_tree->current_scope = parse_tree->global_scope;
462 // TODO: Fill global scope with default types/symbols.
463 return parse_tree;
464}
465 508
466bool 509 // Fill global scope with default types.
467insert_symbol(Parser *parser, Node *symbol) { 510 HashTable *types = parse_tree->global_scope->types;
468 HashTable *symbols = parser->parse_tree->current_scope->symbols; 511 for (size_t i = 0; i < sizeof(default_types)/sizeof(Type); ++i) {
469 if (ht_lookup(symbols, symbol) != NULL) { 512 Type *type = &default_types[i];
470 push_error(ERR_TYPE_PARSER, ERR_SYMBOL_REDEF, symbol->line, symbol->col); 513 ht_insert(types, &type->name, type);
471 return false;
472 } 514 }
473 ht_insert(symbols, symbol, symbol); 515 return parse_tree;
474 return true;
475} 516}
476 517
477bool 518bool
@@ -486,17 +527,67 @@ parse_roots(Parser *parser) {
486 return true; 527 return true;
487} 528}
488 529
489bool 530Type *
531find_type(Parser *parser, Node *type) {
532 // Normally default types will be used more often. Since we don't
533 // allow type shadowing, we search first on the global scope.
534 Scope *scope = parser->parse_tree->global_scope;
535 Type *ret = ht_lookup(scope->types, &type->string);
536 if (ret != NULL) {
537 return ret;
538 }
539 scope = parser->parse_tree->current_scope;
540 while (scope->parent != NULL) {
541 Type *ret = ht_lookup(scope->types, &type->string);
542 if (ret != NULL) {
543 return ret;
544 }
545 scope = scope->parent;
546 }
547 push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TYPE, type->line, type->col);
548 return NULL;
549}
550
551// TODO: Review this function when needed
552// bool
553// insert_type(Parser *parser, Node *type, size_t size) {
554// HashTable *types = parser->parse_tree->current_scope->types;
555// if (ht_lookup(types, type) != NULL) {
556// push_error(ERR_TYPE_PARSER, ERR_TYPE_REDEF, type->line, type->col);
557// return false;
558// }
559// // TODO: alloc_type.
560// ht_insert(types, &type->string, type);
561// return true;
562// }
563
564Type *
490find_symbol(Parser *parser, Node *node) { 565find_symbol(Parser *parser, Node *node) {
491 Scope *scope = parser->parse_tree->current_scope; 566 Scope *scope = parser->parse_tree->current_scope;
492 while (scope != NULL) { 567 while (scope != NULL) {
493 if (ht_lookup(scope->symbols, node) != NULL) { 568 Type *type = ht_lookup(scope->symbols, node);
494 return true; 569 if (type != NULL) {
570 return type;
495 } 571 }
496 scope = scope->parent; 572 scope = scope->parent;
497 } 573 }
498 push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_SYMBOL, node->line, node->col); 574 push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_SYMBOL, node->line, node->col);
499 return false; 575 return NULL;
576}
577
578bool
579insert_symbol(Parser *parser, Node *symbol, Node *type) {
580 HashTable *symbols = parser->parse_tree->current_scope->symbols;
581 if (ht_lookup(symbols, symbol) != NULL) {
582 push_error(ERR_TYPE_PARSER, ERR_SYMBOL_REDEF, symbol->line, symbol->col);
583 return false;
584 }
585 Type *t = find_type(parser, type);
586 if (t == NULL) {
587 return NULL;
588 }
589 ht_insert(symbols, symbol, t);
590 return true;
500} 591}
501 592
502bool 593bool
@@ -511,13 +602,13 @@ symbol_check(Parser *parser, Node *node) {
511 } 602 }
512 } break; 603 } break;
513 case NODE_SYMBOL: { 604 case NODE_SYMBOL: {
514 if (!find_symbol(parser, node)) { 605 if (find_symbol(parser, node) == NULL) {
515 return false; 606 return false;
516 } 607 }
517 } break; 608 } break;
518 case NODE_SET: { 609 case NODE_SET: {
519 Node *symbol = node->set.symbol; 610 Node *symbol = node->set.symbol;
520 if (!find_symbol(parser, symbol)) { 611 if (find_symbol(parser, symbol) == NULL) {
521 return false; 612 return false;
522 } 613 }
523 if (!symbol_check(parser, node->set.value)) { 614 if (!symbol_check(parser, node->set.value)) {
@@ -532,7 +623,8 @@ symbol_check(Parser *parser, Node *node) {
532 // Parameters. 623 // Parameters.
533 for (size_t i = 0; i < array_size(node->fun.param_names); ++i) { 624 for (size_t i = 0; i < array_size(node->fun.param_names); ++i) {
534 Node *param = node->fun.param_names[i]; 625 Node *param = node->fun.param_names[i];
535 if (!insert_symbol(parser, param)) { 626 Node *type = node->fun.param_types[i];
627 if (!insert_symbol(parser, param, type)) {
536 return false; 628 return false;
537 } 629 }
538 } 630 }
@@ -583,7 +675,7 @@ symbol_check(Parser *parser, Node *node) {
583 if (!symbol_check(parser, node->def.value)) { 675 if (!symbol_check(parser, node->def.value)) {
584 return false; 676 return false;
585 } 677 }
586 if (!insert_symbol(parser, node->def.symbol)) { 678 if (!insert_symbol(parser, node->def.symbol, node->def.type)) {
587 return false; 679 return false;
588 } 680 }
589 } break; 681 } break;
@@ -593,7 +685,15 @@ symbol_check(Parser *parser, Node *node) {
593} 685}
594 686
595bool 687bool
596resolve_typeclass(Parser *parser, Node *node) { 688resolve_type(Parser *parser, Node *node) {
689 if (node->expr_type != NULL) {
690 return true;
691 }
692 Scope *prev_scope = NULL;
693 if (node->scope != NULL) {
694 prev_scope = parser->parse_tree->current_scope;
695 parser->parse_tree->current_scope = node->scope;
696 }
597 switch (node->type) { 697 switch (node->type) {
598 case NODE_BUILTIN: { 698 case NODE_BUILTIN: {
599 switch (node->builtin.type) { 699 switch (node->builtin.type) {
@@ -602,7 +702,10 @@ resolve_typeclass(Parser *parser, Node *node) {
602 case TOKEN_SUB: 702 case TOKEN_SUB:
603 case TOKEN_MUL: 703 case TOKEN_MUL:
604 case TOKEN_DIV: 704 case TOKEN_DIV:
605 case TOKEN_MOD: { node->type_class = TYPE_NUM; } break; 705 case TOKEN_MOD: {
706 // TODO: Properly resolve this
707 node->expr_type = &default_types[TYPE_U64];
708 } break;
606 // Bools. 709 // Bools.
607 case TOKEN_NOT: 710 case TOKEN_NOT:
608 case TOKEN_AND: 711 case TOKEN_AND:
@@ -611,49 +714,87 @@ resolve_typeclass(Parser *parser, Node *node) {
611 case TOKEN_LT: 714 case TOKEN_LT:
612 case TOKEN_GT: 715 case TOKEN_GT:
613 case TOKEN_LE: 716 case TOKEN_LE:
614 case TOKEN_GE: { node->type_class = TYPE_BOOL; } break; 717 case TOKEN_GE: {
718 node->expr_type = &default_types[TYPE_BOOL];
719 } break;
615 default: break; 720 default: break;
616 } 721 }
617 for (size_t i = 0; i < array_size(node->builtin.args); ++i) { 722 for (size_t i = 0; i < array_size(node->builtin.args); ++i) {
618 Node *arg = node->builtin.args[i]; 723 Node *arg = node->builtin.args[i];
619 resolve_typeclass(parser, arg); 724 resolve_type(parser, arg);
620 } 725 }
621 } break; 726 } break;
622 case NODE_SYMBOL: { 727 case NODE_SYMBOL: {
623 // TODO: Resolve symbol type? 728 node->expr_type = find_symbol(parser, node);
624 } break; 729 } break;
625 case NODE_FUN: { 730 case NODE_FUN: {
626 // TODO: Resolve `node->type_class` based on the return value. 731 resolve_type(parser, node->fun.body);
627 resolve_typeclass(parser, node->fun.body); 732 StringView *type_body = &node->fun.body->expr_type->name;
733 StringView *return_type = &node->fun.return_type->string;
734 // Check that the type of body matches the return type.
735 if (!sv_equal(type_body, return_type)) {
736 push_error(ERR_TYPE_PARSER, ERR_WRONG_RET_TYPE, node->line, node->col);
737 return false;
738 }
628 } break; 739 } break;
629 case NODE_BLOCK: { 740 case NODE_BLOCK: {
630 for (size_t i = 0; i < array_size(node->block.expr); ++i) { 741 for (size_t i = 0; i < array_size(node->block.expr); ++i) {
631 Node *expr = node->block.expr[i]; 742 Node *expr = node->block.expr[i];
632 resolve_typeclass(parser, expr); 743 resolve_type(parser, expr);
633 } 744 }
634 Node *last_expr = node->block.expr[array_size(node->block.expr) - 1]; 745 Node *last_expr = node->block.expr[array_size(node->block.expr) - 1];
635 node->type_class = last_expr->type_class; 746 node->expr_type = last_expr->expr_type;
636 } break; 747 } break;
637 case NODE_IF: { 748 case NODE_IF: {
638 node->type_class = TYPE_NONE; 749 resolve_type(parser, node->ifexpr.cond);
639 resolve_typeclass(parser, node->ifexpr.cond); 750 // Check ifexpr.cond is a bool.
640 resolve_typeclass(parser, node->ifexpr.expr_true); 751 if (!sv_equal(&node->ifexpr.cond->expr_type->name, &default_types[TYPE_BOOL].name)) {
752 push_error(ERR_TYPE_PARSER, ERR_WRONG_COND_TYPE, node->line, node->col);
753 return false;
754 }
755
756 resolve_type(parser, node->ifexpr.expr_true);
641 if (node->ifexpr.expr_false != NULL) { 757 if (node->ifexpr.expr_false != NULL) {
642 resolve_typeclass(parser, node->ifexpr.expr_false); 758 resolve_type(parser, node->ifexpr.expr_false);
759 // Check if types of expr_true and expr_false match
760 if (!sv_equal(&node->ifexpr.expr_true->expr_type->name, &node->ifexpr.expr_false->expr_type->name)) {
761 push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_T_F, node->line, node->col);
762 return false;
763 }
643 } 764 }
765 node->expr_type = node->ifexpr.expr_true->expr_type;
644 } break; 766 } break;
645 case NODE_SET: { 767 case NODE_SET: {
646 node->type_class = TYPE_NONE; 768 node->expr_type = &default_types[TYPE_VOID];
647 resolve_typeclass(parser, node->set.value); 769 resolve_type(parser, node->set.value);
648 } break; 770 } break;
649 case NODE_DEF: { 771 case NODE_DEF: {
650 node->type_class = TYPE_NONE; 772 node->expr_type = &default_types[TYPE_VOID];
651 resolve_typeclass(parser, node->def.value); 773 resolve_type(parser, node->def.value);
652 } break; 774 } break;
653 case NODE_NUMBER: { node->type_class = TYPE_NUM; } break; 775 case NODE_NUMBER: {
654 case NODE_BOOL: { node->type_class = TYPE_BOOL; } break; 776 // TODO: Is this the best way of doing it? We probably need a more
655 case NODE_STRING: { node->type_class = TYPE_STRING; } break; 777 // sophisticated way of approaching this. For example:
656 case NODE_TYPE: { node->type_class = TYPE_NONE; } break; 778 // `(if (< 1 2) 1 -2)` will currently fail with this approach, since
779 // 1:u64 and -2:s64
780 if (node->number.fractional != 0) {
781 node->expr_type = &default_types[TYPE_F64];
782 } else if (node->number.negative) {
783 node->expr_type = &default_types[TYPE_S64];
784 } else {
785 node->expr_type = &default_types[TYPE_U64];
786 }
787 } break;
788 case NODE_BOOL: {
789 node->expr_type = &default_types[TYPE_BOOL];
790 } break;
791 case NODE_STRING: {
792 node->expr_type = &default_types[TYPE_STR];
793 } break;
794 default: break;
795 }
796 if (node->scope != NULL) {
797 parser->parse_tree->current_scope = prev_scope;
657 } 798 }
658 return true; 799 return true;
659} 800}
@@ -665,7 +806,10 @@ semantic_analysis(Parser *parser) {
665 Node *root = parser->parse_tree->roots[i]; 806 Node *root = parser->parse_tree->roots[i];
666 if (root->type == NODE_FUN) { 807 if (root->type == NODE_FUN) {
667 Node *name = root->fun.name; 808 Node *name = root->fun.name;
668 if (!insert_symbol(parser, name)) { 809 // TODO: make sure we store information in the symbol table that
810 // this is actually a function, not just a variable with
811 // return_type.
812 if (!insert_symbol(parser, name, root->fun.return_type)) {
669 return false; 813 return false;
670 } 814 }
671 } 815 }
@@ -674,13 +818,10 @@ semantic_analysis(Parser *parser) {
674 for (size_t i = 0; i < array_size(parser->parse_tree->roots); ++i) { 818 for (size_t i = 0; i < array_size(parser->parse_tree->roots); ++i) {
675 // Fill up symbol tables in proper scope and check existance. 819 // Fill up symbol tables in proper scope and check existance.
676 symbol_check(parser, parser->parse_tree->roots[i]); 820 symbol_check(parser, parser->parse_tree->roots[i]);
677 // Resolve TypeClass for all elements. 821 // Resolve type of expression for all elements.
678 resolve_typeclass(parser, parser->parse_tree->roots[i]); 822 resolve_type(parser, parser->parse_tree->roots[i]);
679 } 823 }
680 824
681 // TODO: Resolve concrete types.
682 // TODO: Type check.
683
684 return true; 825 return true;
685} 826}
686 827
diff --git a/src/parser.h b/src/parser.h
index 3c2dc2b..cc3ba92 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -5,10 +5,15 @@
5#include "nodes.h" 5#include "nodes.h"
6#include "hashtable.h" 6#include "hashtable.h"
7 7
8typedef struct Type {
9 StringView name;
10 size_t size; // (bytes)
11} Type;
12
8typedef struct Scope { 13typedef struct Scope {
9 struct Scope *parent; 14 struct Scope *parent;
10 HashTable *symbols; 15 HashTable *symbols;
11 // HashTable types; 16 HashTable *types;
12} Scope; 17} Scope;
13 18
14typedef struct ParseTree { 19typedef struct ParseTree {
diff --git a/src/viz.c b/src/viz.c
index d409472..8b5d4cf 100644
--- a/src/viz.c
+++ b/src/viz.c
@@ -19,6 +19,10 @@ viz_node(Node *node) {
19 } 19 }
20 printf("%zu [width=2.5,shape=Mrecord,label=\"", node->id); 20 printf("%zu [width=2.5,shape=Mrecord,label=\"", node->id);
21 printf("<top> %s -- [%4ld:%-4ld] ", node_str[node->type], node->line, node->col); 21 printf("<top> %s -- [%4ld:%-4ld] ", node_str[node->type], node->line, node->col);
22 if (node->expr_type != NULL) {
23 printf("| T: ");
24 sv_write(&node->expr_type->name);
25 }
22 switch (node->type) { 26 switch (node->type) {
23 case NODE_NUMBER: { 27 case NODE_NUMBER: {
24 printf("| Value: "); 28 printf("| Value: ");
@@ -136,6 +140,9 @@ viz_node(Node *node) {
136 140
137void 141void
138viz_ast(ParseTree *parse_tree) { 142viz_ast(ParseTree *parse_tree) {
143 if (parse_tree == NULL) {
144 return;
145 }
139 printf("digraph ast {\n"); 146 printf("digraph ast {\n");
140 printf("rankdir=LR;\n"); 147 printf("rankdir=LR;\n");
141 printf("ranksep=\"0.95 equally\";\n"); 148 printf("ranksep=\"0.95 equally\";\n");