aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-06-21 11:40:31 +0200
committerBad Diode <bd@badd10de.dev>2024-06-21 11:40:31 +0200
commit4646adb64119d9bd761447024a2f35cc0c9c2115 (patch)
tree116dfd0f2ad27933639188e6d29eec42305124d7 /src
parentc10fe9d48d40e3fa2a20ee61b79518dfbaeb4db9 (diff)
downloadbdl-4646adb64119d9bd761447024a2f35cc0c9c2115.tar.gz
bdl-4646adb64119d9bd761447024a2f35cc0c9c2115.zip
Add a basic symbol checker
Diffstat (limited to 'src')
-rw-r--r--src/badlib.h218
-rw-r--r--src/main.c284
-rw-r--r--src/parser.c7
3 files changed, 383 insertions, 126 deletions
diff --git a/src/badlib.h b/src/badlib.h
index ada309a..e6f0df6 100644
--- a/src/badlib.h
+++ b/src/badlib.h
@@ -818,117 +818,116 @@ queue_pop(Queue *l) {
818 818
819// Specialiced commonly used hash sets/maps as macro definitions. There is no 819// Specialiced commonly used hash sets/maps as macro definitions. There is no
820// delete on these ones but are trivial to implement! 820// delete on these ones but are trivial to implement!
821#define SETDEF(STRUCTNAME, FUNCNAME, KEYTYPE, HASHFUNC, EQFUNC) \ 821#define SETDEF(STRUCTNAME, FUNCNAME, KEYTYPE, HASHFUNC, EQFUNC) \
822 typedef struct STRUCTNAME { \ 822 typedef struct STRUCTNAME { \
823 struct STRUCTNAME *child[4]; \ 823 struct STRUCTNAME *child[4]; \
824 KEYTYPE key; \ 824 KEYTYPE key; \
825 } STRUCTNAME; \ 825 } STRUCTNAME; \
826 STRUCTNAME *\ 826 STRUCTNAME *FUNCNAME##_lookup(STRUCTNAME **m, KEYTYPE key) { \
827 FUNCNAME##_lookup(STRUCTNAME **m, KEYTYPE key) {\ 827 u64 h = HASHFUNC(key); \
828 u64 h = HASHFUNC(key);\ 828 while (*m) { \
829 while (*m) {\ 829 if (EQFUNC(key, (*m)->key)) { \
830 if (EQFUNC(key, (*m)->key)) {\ 830 return *m; \
831 return *m;\ 831 } \
832 }\ 832 h = (h << 2) | (h >> 62); \
833 h = (h << 2) | (h >> 62);\ 833 m = &(*m)->child[h & 0x3]; \
834 m = &(*m)->child[h & 0x3];\ 834 } \
835 }\ 835 return NULL; \
836 return NULL;\ 836 } \
837 }\ 837 STRUCTNAME *FUNCNAME##_insert(STRUCTNAME **m, KEYTYPE key, Arena *a) { \
838 STRUCTNAME *\ 838 u64 h = HASHFUNC(key); \
839 FUNCNAME##_insert(STRUCTNAME **m, KEYTYPE key, Arena *a) {\ 839 while (*m) { \
840 u64 h = HASHFUNC(key);\ 840 if (EQFUNC(key, (*m)->key)) { \
841 while (*m) {\ 841 return *m; \
842 if (EQFUNC(key, (*m)->key)) {\ 842 } \
843 return *m;\ 843 h = (h << 2) | (h >> 62); \
844 }\ 844 m = &(*m)->child[h & 0x3]; \
845 h = (h << 2) | (h >> 62);\ 845 } \
846 m = &(*m)->child[h & 0x3];\ 846 *m = arena_calloc(sizeof(STRUCTNAME), a); \
847 }\ 847 (*m)->key = key; \
848 *m = arena_calloc(sizeof(STRUCTNAME), a);\ 848 return *m; \
849 (*m)->key = key;\ 849 } \
850 return *m;\ 850 typedef Queue STRUCTNAME##Iter; \
851 }\ 851 STRUCTNAME##Iter FUNCNAME##_iterator(STRUCTNAME *map, Arena *a) { \
852 typedef Queue STRUCTNAME##Iter;\ 852 STRUCTNAME##Iter it = {0}; \
853 STRUCTNAME##Iter\ 853 queue_push(&it, map, a); \
854 FUNCNAME##_iterator(STRUCTNAME *map, Arena *a) {\ 854 return it; \
855 STRUCTNAME##Iter it = {0};\ 855 } \
856 queue_push(&it, map, a);\ 856 STRUCTNAME *FUNCNAME##_next(STRUCTNAME##Iter *it, Arena *a) { \
857 return it;\ 857 assert(it); \
858 }\ 858 assert(a); \
859 STRUCTNAME *\ 859 while (it->head) { \
860 FUNCNAME##_next(STRUCTNAME##Iter *it, Arena *a) {\ 860 STRUCTNAME *item = (STRUCTNAME *)queue_pop(it); \
861 assert(it);\ 861 if (!item) { \
862 assert(a);\ 862 return NULL; \
863 while (it->head) {\ 863 } \
864 STRUCTNAME *item = (STRUCTNAME *)queue_pop(it);\ 864 for (sz i = 0; i < 4; i++) { \
865 for (sz i = 0; i < 4; i++) {\ 865 STRUCTNAME *child = item->child[i]; \
866 STRUCTNAME *child = item->child[i];\ 866 if (child) { \
867 if (child) {\ 867 queue_push(it, child, a); \
868 queue_push(it, child, a);\ 868 } \
869 }\ 869 } \
870 }\ 870 return item; \
871 return item;\ 871 } \
872 }\ 872 return NULL; \
873 return NULL;\
874 } 873 }
875 874
876#define MAPDEF(STRUCTNAME, FUNCNAME, KEYTYPE, VALTYPE, HASHFUNC, EQFUNC) \ 875#define MAPDEF(STRUCTNAME, FUNCNAME, KEYTYPE, VALTYPE, HASHFUNC, EQFUNC) \
877 typedef struct STRUCTNAME { \ 876 typedef struct STRUCTNAME { \
878 struct STRUCTNAME *child[4]; \ 877 struct STRUCTNAME *child[4]; \
879 KEYTYPE key; \ 878 KEYTYPE key; \
880 VALTYPE val; \ 879 VALTYPE val; \
881 } STRUCTNAME;\ 880 } STRUCTNAME; \
882 STRUCTNAME *\ 881 STRUCTNAME *FUNCNAME##_insert(STRUCTNAME **m, KEYTYPE key, VALTYPE val, \
883 FUNCNAME##_insert(STRUCTNAME **m, KEYTYPE key, VALTYPE val, Arena *a) {\ 882 Arena *a) { \
884 u64 h = HASHFUNC(key);\ 883 u64 h = HASHFUNC(key); \
885 while (*m) {\ 884 while (*m) { \
886 if (EQFUNC(key, (*m)->key)) {\ 885 if (EQFUNC(key, (*m)->key)) { \
887 (*m)->val = val;\ 886 (*m)->val = val; \
888 return *m;\ 887 return *m; \
889 }\ 888 } \
890 h = (h << 2) | (h >> 62);\ 889 h = (h << 2) | (h >> 62); \
891 m = &(*m)->child[h & 0x3];\ 890 m = &(*m)->child[h & 0x3]; \
892 }\ 891 } \
893 *m = arena_calloc(sizeof(STRUCTNAME), a);\ 892 *m = arena_calloc(sizeof(STRUCTNAME), a); \
894 (*m)->key = key;\ 893 (*m)->key = key; \
895 (*m)->val = val;\ 894 (*m)->val = val; \
896 return *m;\ 895 return *m; \
897 }\ 896 } \
898 STRUCTNAME *\ 897 STRUCTNAME *FUNCNAME##_lookup(STRUCTNAME **m, KEYTYPE key) { \
899 FUNCNAME##_lookup(STRUCTNAME **m, KEYTYPE key) {\ 898 u64 h = HASHFUNC(key); \
900 u64 h = HASHFUNC(key);\ 899 while (*m) { \
901 while (*m) {\ 900 if (EQFUNC(key, (*m)->key)) { \
902 if (EQFUNC(key, (*m)->key)) {\ 901 return *m; \
903 return *m;\ 902 } \
904 }\ 903 h = (h << 2) | (h >> 62); \
905 h = (h << 2) | (h >> 62);\ 904 m = &(*m)->child[h & 0x3]; \
906 m = &(*m)->child[h & 0x3];\ 905 } \
907 }\ 906 return NULL; \
908 return NULL;\ 907 } \
909 }\ 908 typedef Queue STRUCTNAME##Iter; \
910 typedef Queue STRUCTNAME##Iter;\ 909 STRUCTNAME##Iter FUNCNAME##_iterator(STRUCTNAME *map, Arena *a) { \
911 STRUCTNAME##Iter\ 910 STRUCTNAME##Iter it = {0}; \
912 FUNCNAME##_iterator(STRUCTNAME *map, Arena *a) {\ 911 queue_push(&it, map, a); \
913 STRUCTNAME##Iter it = {0};\ 912 return it; \
914 queue_push(&it, map, a);\ 913 } \
915 return it;\ 914 STRUCTNAME *FUNCNAME##_next(STRUCTNAME##Iter *it, Arena *a) { \
916 }\ 915 assert(it); \
917 STRUCTNAME *\ 916 assert(a); \
918 FUNCNAME##_next(STRUCTNAME##Iter *it, Arena *a) {\ 917 while (it->head) { \
919 assert(it);\ 918 STRUCTNAME *item = (STRUCTNAME *)queue_pop(it); \
920 assert(a);\ 919 if (!item) { \
921 while (it->head) {\ 920 return NULL; \
922 STRUCTNAME *item = (STRUCTNAME *)queue_pop(it);\ 921 } \
923 for (sz i = 0; i < 4; i++) {\ 922 for (sz i = 0; i < 4; i++) { \
924 STRUCTNAME *child = item->child[i];\ 923 STRUCTNAME *child = item->child[i]; \
925 if (child) {\ 924 if (child) { \
926 queue_push(it, child, a);\ 925 queue_push(it, child, a); \
927 }\ 926 } \
928 }\ 927 } \
929 return item;\ 928 return item; \
930 }\ 929 } \
931 return NULL;\ 930 return NULL; \
932 } 931 }
933 932
934u64 933u64
@@ -951,7 +950,6 @@ _int_hash(sz a) {
951 return a * UINT64_C(11400714819323198485); 950 return a * UINT64_C(11400714819323198485);
952} 951}
953 952
954
955// Commonly used map/set types. 953// Commonly used map/set types.
956SETDEF(StrSet, strset, Str, str_hash, str_eq) 954SETDEF(StrSet, strset, Str, str_hash, str_eq)
957MAPDEF(StrIntMap, strintmap, Str, sz, str_hash, str_eq) 955MAPDEF(StrIntMap, strintmap, Str, sz, str_hash, str_eq)
diff --git a/src/main.c b/src/main.c
index 091e57f..e5f0f29 100644
--- a/src/main.c
+++ b/src/main.c
@@ -22,21 +22,248 @@ init(void) {
22 log_init_default(); 22 log_init_default();
23} 23}
24 24
25typedef enum {
26 SYM_VAR,
27 SYM_FUN,
28 SYM_PARAM,
29 SYM_BUILTIN,
30} SymbolKind;
31
32typedef struct Symbol {
33 SymbolKind kind;
34} Symbol;
35
36MAPDEF(SymbolMap, symmap, Str, Symbol, str_hash, str_eq)
37
38typedef struct Scope {
39 sz id;
40 sz depth;
41 SymbolMap *symbols;
42 struct Scope *parent;
43} Scope;
44
45typedef struct Analyzer {
46 Arena *storage;
47 Str file_name;
48 sz scope_gen;
49 Scope **scopes;
50} Analyzer;
51
52Scope *
53scope_alloc(Analyzer *a, Scope *parent) {
54 bool is_root = parent == NULL;
55 Scope *scope = arena_malloc(sizeof(Scope), a->storage);
56 scope->id = a->scope_gen++;
57 scope->parent = parent;
58 scope->depth = is_root ? 0 : parent->depth + 1;
59 array_push(a->scopes, scope, a->storage);
60 return scope;
61}
62
63SymbolMap *
64find_symbol(Scope *scope, Str symbol) {
65 while (scope != NULL) {
66 SymbolMap *val = symmap_lookup(&scope->symbols, symbol);
67 if (val != NULL) {
68 return val;
69 }
70 scope = scope->parent;
71 }
72 return NULL;
73}
74
25void 75void
26process_file(Str path) { 76analyzer_symbols(Analyzer *a, Node *node, Scope *scope) {
27 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); 77 assert(a);
78 assert(scope);
79 if (!node) {
80 return;
81 }
82 assert(node);
83 switch (node->kind) {
84 case NODE_FUN: {
85 if (scope->depth != 0) {
86 Str symbol = node->func_name->value.str;
87 if (symmap_lookup(&scope->symbols, symbol) != NULL) {
88 eprintln(
89 "%s:%d:%d: error: symbol '%s' already exists in "
90 "current "
91 "scope",
92 a->file_name, node->func_name->line,
93 node->func_name->col, symbol);
94 }
95 symmap_insert(&scope->symbols, symbol,
96 (Symbol){.kind = SYM_FUN}, a->storage);
97 }
98
99 scope = scope_alloc(a, scope);
100 for (sz i = 0; i < array_size(node->func_params); i++) {
101 Node *param = node->func_params[i];
102 symmap_insert(&scope->symbols, param->param_name->value.str,
103 (Symbol){.kind = SYM_PARAM}, a->storage);
104 }
105 analyzer_symbols(a, node->func_body, scope);
106 } break;
107 case NODE_COND:
108 case NODE_MATCH: {
109 if (node->match_expr) {
110 analyzer_symbols(a, node->match_expr, scope);
111 }
112 for (sz i = 0; i < array_size(node->match_cases); i++) {
113 Node *expr = node->match_cases[i];
114 analyzer_symbols(a, expr, scope);
115 }
116 } break;
117 case NODE_RETURN:
118 // TODO: Struct field initializers need to be checked... when we
119 // get to that.
120 // case NODE_VAL_FIELD:
121 // case NODE_ENUM:
122 // case NODE_STRUCT:
123 // case NODE_STRUCT_LIT:
124 case NODE_BLOCK: {
125 Scope *next = scope_alloc(a, scope);
126 for (sz i = 0; i < array_size(node->elements); i++) {
127 Node *expr = node->elements[i];
128 analyzer_symbols(a, expr, next);
129 }
130 } break;
131 case NODE_CASE: {
132 analyzer_symbols(a, node->case_value, scope);
133 if (node->case_expr) {
134 Scope *next = scope_alloc(a, scope);
135 analyzer_symbols(a, node->case_expr, next);
136 }
137 } break;
138 case NODE_IF: {
139 analyzer_symbols(a, node->cond_if, scope);
140 if (node->cond_expr) {
141 Scope *next = scope_alloc(a, scope);
142 analyzer_symbols(a, node->cond_expr, next);
143 }
144 if (node->cond_else) {
145 Scope *next = scope_alloc(a, scope);
146 analyzer_symbols(a, node->cond_else, next);
147 }
148 } break;
149 case NODE_WHILE: {
150 analyzer_symbols(a, node->while_cond, scope);
151 scope = scope_alloc(a, scope);
152 analyzer_symbols(a, node->while_expr, scope);
153 } break;
154 case NODE_FUNCALL: {
155 Str symbol = node->value.str;
156 if (find_symbol(scope, symbol) == NULL) {
157 eprintln(
158 "%s:%d:%d: error: symbol '%s' doesn't exists in current "
159 " scope ",
160 a->file_name, node->line, node->col, symbol);
161 }
162 Scope *next = scope_alloc(a, scope);
163 for (sz i = 0; i < array_size(node->elements); i++) {
164 Node *expr = node->elements[i];
165 analyzer_symbols(a, expr, next);
166 }
167 } break;
168 case NODE_SYMBOL_IDX:
169 case NODE_SYMBOL: {
170 Str symbol = node->value.str;
171 if (find_symbol(scope, symbol) == NULL) {
172 eprintln(
173 "%s:%d:%d: error: symbol '%s' doesn't exists in current "
174 " scope ",
175 a->file_name, node->line, node->col, symbol);
176 }
177 // TODO: Resolve symbol chains.
178 } break;
179 case NODE_SET: {
180 analyzer_symbols(a, node->var_name, scope);
181 analyzer_symbols(a, node->var_val, scope);
182 } break;
183 case NODE_LET: {
184 // Check the value first to avoid recursive symbol usage.
185 analyzer_symbols(a, node->var_val, scope);
186
187 Str symbol = node->var_name->value.str;
188 if (symmap_lookup(&scope->symbols, symbol) != NULL) {
189 eprintln(
190 "%s:%d:%d: error: symbol '%s' already exists in current "
191 " scope ",
192 a->file_name, node->var_name->line, node->var_name->col,
193 symbol);
194 }
195 symmap_insert(&scope->symbols, symbol, (Symbol){.kind = SYM_VAR},
196 a->storage);
197 } break;
198 // Binary ops
199 case NODE_ADD:
200 case NODE_SUB:
201 case NODE_DIV:
202 case NODE_MUL:
203 case NODE_MOD:
204 case NODE_NOT:
205 case NODE_AND:
206 case NODE_OR:
207 case NODE_EQ:
208 case NODE_NEQ:
209 case NODE_LT:
210 case NODE_GT:
211 case NODE_LE:
212 case NODE_GE:
213 case NODE_BITNOT:
214 case NODE_BITAND:
215 case NODE_BITOR:
216 case NODE_BITLSHIFT:
217 case NODE_BITRSHIFT: {
218 analyzer_symbols(a, node->left, scope);
219 analyzer_symbols(a, node->right, scope);
220 } break;
221 default: break;
222 }
223}
224
225void
226symbolic_analysis(Analyzer *a, Parser *parser) {
227 Scope *scope = scope_alloc(a, NULL);
228
229 // Fill builtin functions.
230 Str builtin_functions[] = {
231 cstr("print"),
232 cstr("println"),
233 };
234 for (sz i = 0; i < LEN(builtin_functions); i++) {
235 Str symbol = builtin_functions[i];
236 symmap_insert(&scope->symbols, symbol, (Symbol){.kind = SYM_BUILTIN},
237 a->storage);
238 }
239
240 // Find top level function declarations.
241 for (sz i = 0; i < array_size(parser->nodes); i++) {
242 Node *root = parser->nodes[i];
243 if (root->kind == NODE_FUN) {
244 Str symbol = root->func_name->value.str;
245 if (symmap_lookup(&scope->symbols, symbol) != NULL) {
246 eprintln(
247 "%s:%d:%d: error: symbol '%s' already exists in current "
248 "scope",
249 a->file_name, root->func_name->line, root->func_name->col,
250 symbol);
251 }
252 symmap_insert(&scope->symbols, symbol, (Symbol){.kind = SYM_FUN},
253 a->storage);
254 }
255 }
28 256
29 StrIntMap *map = NULL; 257 // Recursively fill symbol tables.
30 strintmap_insert(&map, cstr("test"), 1, &lexer_arena); 258 for (sz i = 0; i < array_size(parser->nodes); i++) {
31 strintmap_insert(&map, cstr("toast"), 9, &lexer_arena); 259 Node *root = parser->nodes[i];
32 strintmap_insert(&map, cstr("waaaa"), 420, &lexer_arena); 260 analyzer_symbols(a, root, scope);
33 strintmap_insert(&map, cstr("test"), 69, &lexer_arena);
34 StrIntMapIter iter = strintmap_iterator(map, &lexer_arena);
35 StrIntMap *val = strintmap_next(&iter, &lexer_arena);
36 while (val) {
37 println("KEY: %s value: %d", val->key, val->val);
38 val = strintmap_next(&iter, &lexer_arena);
39 } 261 }
262}
263
264void
265process_file(Str path) {
266 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);
40 267
41 FileContents file = platform_read_file(path, &lexer_arena); 268 FileContents file = platform_read_file(path, &lexer_arena);
42 if (file.err) { 269 if (file.err) {
@@ -94,7 +321,34 @@ process_file(Str path) {
94 graph_ast(parser.nodes); 321 graph_ast(parser.nodes);
95 goto stop; 322 goto stop;
96 } 323 }
97 // TODO: Semantic analysis. 324
325 // Semantic analysis.
326 Analyzer analyzer = (Analyzer){
327 .storage = &lexer_arena,
328 .file_name = path,
329 };
330 symbolic_analysis(&analyzer, &parser);
331
332 // DEBUG: Printing all symbols on the HT.
333 // StrSetIter iter = strset_iterator(root_scope.symbols, &lexer_arena);
334 // StrSet *val = strset_next(&iter, &lexer_arena);
335 // while (val) {
336 // println("SYMBOL: %s", val->key);
337 // val = strset_next(&iter, &lexer_arena);
338 // }
339
340 // StrIntMap *map = NULL;
341 // strintmap_insert(&map, cstr("test"), 1, &lexer_arena);
342 // strintmap_insert(&map, cstr("toast"), 9, &lexer_arena);
343 // strintmap_insert(&map, cstr("waaaa"), 420, &lexer_arena);
344 // strintmap_insert(&map, cstr("test"), 69, &lexer_arena);
345 // StrIntMapIter iter = strintmap_iterator(map, &lexer_arena);
346 // StrIntMap *val = strintmap_next(&iter, &lexer_arena);
347 // while (val) {
348 // println("KEY: %s value: %d", val->key, val->val);
349 // val = strintmap_next(&iter, &lexer_arena);
350 // }
351
98 // TODO: Type checking. 352 // TODO: Type checking.
99 353
100 // Compile roots. 354 // Compile roots.
@@ -136,6 +390,10 @@ process_file(Str path) {
136 // &(Array){.mem = (u8 *)&vm.regs, sizeof(vm.regs)}); 390 // &(Array){.mem = (u8 *)&vm.regs, sizeof(vm.regs)});
137 // disassemble_chunk(chunk); 391 // disassemble_chunk(chunk);
138 392
393#if DEBUG == 1
394 println("Space used: %{Arena}", &lexer_arena);
395#endif
396
139 // arena_destroy(&bytecode_arena, os_allocator); 397 // arena_destroy(&bytecode_arena, os_allocator);
140stop: 398stop:
141 // Free up resources. 399 // Free up resources.
diff --git a/src/parser.c b/src/parser.c
index 8732a25..5928f7b 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -697,6 +697,10 @@ parse_keyword(Parser *parser) {
697 node = node_alloc(parser, NODE_FUN, prev); 697 node = node_alloc(parser, NODE_FUN, prev);
698 if (!node) return; 698 if (!node) return;
699 parse_consume(parser, TOK_SYMBOL, cstr("expected function name")); 699 parse_consume(parser, TOK_SYMBOL, cstr("expected function name"));
700 Node *name = node_alloc(parser, NODE_SYMBOL, prev);
701 if (!name) return;
702 name->value.sym = parser->previous.val;
703 node->func_name = name;
700 parse_consume(parser, TOK_LPAREN, 704 parse_consume(parser, TOK_LPAREN,
701 cstr("expected '(' on function definition")); 705 cstr("expected '(' on function definition"));
702 // Parameters. 706 // Parameters.
@@ -968,9 +972,6 @@ parse_symbol(Parser *parser) {
968 parse_symbol(parser); 972 parse_symbol(parser);
969 node->next = array_pop(parser->nodes); 973 node->next = array_pop(parser->nodes);
970 } 974 }
971 } else {
972 node = node_alloc(parser, NODE_SYMBOL, prev);
973 if (!node) return;
974 } 975 }
975 node->value.sym = prev.val; 976 node->value.sym = prev.val;
976 array_push(parser->nodes, node, parser->storage); 977 array_push(parser->nodes, node, parser->storage);