aboutsummaryrefslogtreecommitdiffstats
path: root/src/semantic.c
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2022-04-18 16:27:21 -0300
committerBad Diode <bd@badd10de.dev>2022-04-18 16:27:21 -0300
commit3da041f2e17fdeb69bf345aadf89c5fcc1814260 (patch)
treec1979ffee13f45f757712a61304a3edba89a80f5 /src/semantic.c
parentdcd3192e50d7b4ea333ecf57a7e8b325af145547 (diff)
downloadbdl-3da041f2e17fdeb69bf345aadf89c5fcc1814260.tar.gz
bdl-3da041f2e17fdeb69bf345aadf89c5fcc1814260.zip
Move semantic analysis to separate file
Diffstat (limited to 'src/semantic.c')
-rw-r--r--src/semantic.c527
1 files changed, 527 insertions, 0 deletions
diff --git a/src/semantic.c b/src/semantic.c
new file mode 100644
index 0000000..06958b9
--- /dev/null
+++ b/src/semantic.c
@@ -0,0 +1,527 @@
1#include "hashtable.h"
2
3typedef struct Scope {
4 struct Scope *parent;
5 HashTable *symbols;
6 HashTable *types;
7} Scope;
8
9typedef struct ParseTree {
10 Root *roots;
11 Scope *global_scope;
12 Scope *current_scope;
13} ParseTree;
14
15typedef struct Type {
16 StringView name;
17 size_t size; // (bytes)
18} Type;
19
20typedef enum DefaultType {
21 TYPE_VOID,
22 TYPE_BOOL,
23 TYPE_STR,
24 TYPE_U8,
25 TYPE_U16,
26 TYPE_U32,
27 TYPE_U64,
28 TYPE_S8,
29 TYPE_S16,
30 TYPE_S32,
31 TYPE_S64,
32 TYPE_F32,
33 TYPE_F64,
34} DefaultType;
35
36static Type default_types[] = {
37 [TYPE_VOID] = {STRING("void"), 0},
38 [TYPE_BOOL] = {STRING("bool"), 1},
39 [TYPE_STR] = {STRING("str"), 16}, // size (8) + pointer to data (8).
40 [TYPE_U8] = {STRING("u8"), 1},
41 [TYPE_U16] = {STRING("u16"), 2},
42 [TYPE_U32] = {STRING("u32"), 4},
43 [TYPE_U64] = {STRING("u64"), 8},
44 [TYPE_S8] = {STRING("s8"), 1},
45 [TYPE_S16] = {STRING("s16"), 2},
46 [TYPE_S32] = {STRING("s32"), 4},
47 [TYPE_S64] = {STRING("s64"), 8},
48 [TYPE_F32] = {STRING("f32"), 4},
49 [TYPE_F64] = {STRING("f64"), 8},
50};
51
52typedef enum SymbolType {
53 SYMBOL_VAR,
54 SYMBOL_FUN,
55} SymbolType;
56
57typedef struct Symbol {
58 Node *name;
59 SymbolType type;
60
61 union {
62 struct {
63 Node *type;
64 } var;
65
66 struct {
67 Node **param_types;
68 Node *return_type;
69 } fun;
70 };
71} Symbol;
72
73
74Symbol *
75alloc_symval(Node *name, SymbolType type) {
76 Symbol *val = malloc(sizeof(Symbol));
77 val->name = name;
78 val->type = type;
79 return val;
80}
81
82u64 sym_hash(const struct HashTable *table, void *bytes) {
83 Node *symbol = bytes;
84 u64 hash = _xor_shift_hash(symbol->string.start, symbol->string.n);
85 hash = _fibonacci_hash(hash, table->shift_amount);
86 return hash;
87}
88
89bool sym_eq(void *a, void *b) {
90 Node *a_node = a;
91 Node *b_node = b;
92 assert(a_node->type == NODE_SYMBOL);
93 assert(b_node->type == NODE_SYMBOL);
94 return sv_equal(&a_node->string, &b_node->string);
95}
96
97u64 type_hash(const struct HashTable *table, void *bytes) {
98 StringView *type = bytes;
99 u64 hash = _xor_shift_hash(type->start, type->n);
100 hash = _fibonacci_hash(hash, table->shift_amount);
101 return hash;
102}
103
104bool type_eq(void *a, void *b) {
105 StringView *a_type = a;
106 StringView *b_type = b;
107 return sv_equal(a_type, b_type);
108}
109
110Scope *
111alloc_scope(Scope *parent) {
112 Scope *scope = malloc(sizeof(Scope));
113 scope->parent = parent;
114 scope->symbols = ht_init(sym_hash, sym_eq);
115 scope->types = ht_init(type_hash, type_eq);
116 return scope;
117}
118
119Type *
120find_type(Scope *scope, Node *type) {
121 // TODO: Normally default types will be used more often. Since we don't
122 // allow type shadowing, we should search first on the global scope.
123 while (scope != NULL) {
124 Type *ret = ht_lookup(scope->types, &type->string);
125 if (ret != NULL) {
126 return ret;
127 }
128 scope = scope->parent;
129 }
130 push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TYPE, type->line, type->col);
131 return NULL;
132}
133
134bool
135insert_symbol(Scope *scope, Node *symbol, Symbol *val) {
136 // Check if symbol already exists.
137 HashTable *symbols = scope->symbols;
138 if (ht_lookup(symbols, symbol) != NULL) {
139 push_error(ERR_TYPE_PARSER, ERR_SYMBOL_REDEF, symbol->line, symbol->col);
140 return false;
141 }
142 ht_insert(symbols, symbol, val);
143 return true;
144}
145
146Type *
147coerce_numeric_types(Type *a, Type *b) {
148 // TODO: Decide what to do with mixed numeric types. What are the promotion
149 // rules, etc.
150 if (a == &default_types[TYPE_U8]) {
151 if (b == &default_types[TYPE_U16] ||
152 b == &default_types[TYPE_U32] ||
153 b == &default_types[TYPE_U64]) {
154 return b;
155 }
156 } else if (a == &default_types[TYPE_U16]) {
157 if (b == &default_types[TYPE_U32] ||
158 b == &default_types[TYPE_U64]) {
159 return b;
160 }
161 } else if (a == &default_types[TYPE_U32]) {
162 if (b == &default_types[TYPE_U64]) {
163 return b;
164 }
165 } else if (a == &default_types[TYPE_S8]) {
166 if (b == &default_types[TYPE_S16] ||
167 b == &default_types[TYPE_S32] ||
168 b == &default_types[TYPE_S64]) {
169 return b;
170 }
171 } else if (a == &default_types[TYPE_S16]) {
172 if (b == &default_types[TYPE_S32] ||
173 b == &default_types[TYPE_S64]) {
174 return b;
175 }
176 } else if (a == &default_types[TYPE_S32]) {
177 if (b == &default_types[TYPE_S64]) {
178 return b;
179 }
180 } else if (a == &default_types[TYPE_F32]) {
181 if (b == &default_types[TYPE_F64]) {
182 return b;
183 }
184 }
185 return a;
186}
187
188bool
189type_is_numeric(Type *t) {
190 if (t == &default_types[TYPE_U8] ||
191 t == &default_types[TYPE_U16] ||
192 t == &default_types[TYPE_U32] ||
193 t == &default_types[TYPE_U64] ||
194 t == &default_types[TYPE_S8] ||
195 t == &default_types[TYPE_S16] ||
196 t == &default_types[TYPE_S32] ||
197 t == &default_types[TYPE_S64] ||
198 t == &default_types[TYPE_F32] ||
199 t == &default_types[TYPE_F64]) {
200 return true;
201 }
202 return false;
203}
204
205Symbol *
206find_symbol(Scope *scope, Node *node) {
207 while (scope != NULL) {
208 Symbol *val = ht_lookup(scope->symbols, node);
209 if (val != NULL) {
210 return val;
211 }
212 scope = scope->parent;
213 }
214 push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_SYMBOL, node->line, node->col);
215 return NULL;
216}
217
218bool
219resolve_type(Scope *scope, Node *node) {
220 if (node->expr_type != NULL) {
221 return true;
222 }
223 switch (node->type) {
224 case NODE_BUILTIN: {
225 for (size_t i = 0; i < array_size(node->builtin.args); ++i) {
226 Node *arg = node->builtin.args[i];
227 if (!resolve_type(scope, arg)) {
228 return false;
229 }
230 }
231 switch (node->builtin.type) {
232 // Numbers.
233 case TOKEN_ADD:
234 case TOKEN_SUB:
235 case TOKEN_MUL:
236 case TOKEN_DIV:
237 case TOKEN_MOD: {
238 Type *type = NULL;
239 for (size_t i = 0; i < array_size(node->builtin.args); ++i) {
240 Node *arg = node->builtin.args[i];
241
242 // Check that all arguments are numbers.
243 if (!type_is_numeric(arg->expr_type)) {
244 push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_NUM,
245 arg->line, arg->col);
246 return false;
247 }
248
249 if (type == NULL) {
250 type = arg->expr_type;
251 } else if (type != arg->expr_type) {
252 type = coerce_numeric_types(type, arg->expr_type);
253 }
254 }
255 node->expr_type = type;
256 } break;
257 // Bools.
258 case TOKEN_NOT:
259 case TOKEN_AND:
260 case TOKEN_OR: {
261 // Check that all arguments are boolean.
262 for (size_t i = 0; i < array_size(node->builtin.args); ++i) {
263 Node *arg = node->builtin.args[i];
264 if (arg->expr_type != &default_types[TYPE_BOOL]) {
265 push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_BOOL,
266 arg->line, arg->col);
267 return false;
268 }
269 }
270 node->expr_type = &default_types[TYPE_BOOL];
271 } break;
272 case TOKEN_EQ:
273 case TOKEN_LT:
274 case TOKEN_GT:
275 case TOKEN_LE:
276 case TOKEN_GE: {
277 // Check that all arguments are nums.
278 for (size_t i = 0; i < array_size(node->builtin.args); ++i) {
279 Node *arg = node->builtin.args[i];
280 if (!type_is_numeric(arg->expr_type)) {
281 push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_NUM,
282 arg->line, arg->col);
283 return false;
284 }
285 }
286 node->expr_type = &default_types[TYPE_BOOL];
287 } break;
288 default: break;
289 }
290 } break;
291 case NODE_SYMBOL: {
292 Symbol *val = find_symbol(scope, node);
293 if (val == NULL) {
294 return false;
295 }
296
297 Type *type = NULL;
298 switch (val->type) {
299 case SYMBOL_VAR: {
300 type = find_type(scope, val->var.type);
301 } break;
302 case SYMBOL_FUN: {
303 type = find_type(scope, val->fun.return_type);
304 } break;
305 }
306 if (type == NULL) {
307 return false;
308 }
309 node->expr_type = type;
310 } break;
311 case NODE_FUN: {
312 // Fill up new scope with parameters
313 scope = alloc_scope(scope);
314
315 // Parameters.
316 for (size_t i = 0; i < array_size(node->fun.param_names); ++i) {
317 Node *param = node->fun.param_names[i];
318 Node *type = node->fun.param_types[i];
319 Symbol *var = alloc_symval(param, SYMBOL_VAR);
320 var->var.type = type;
321 if (!insert_symbol(scope, param, var)) {
322 return false;
323 }
324 }
325
326 // Body.
327 Node *body = node->fun.body;
328 if (body->type == NODE_BLOCK) {
329 for (size_t i = 0; i < array_size(body->block.expr); ++i) {
330 Node *expr = body->block.expr[i];
331 if (!resolve_type(scope, expr)) {
332 return false;
333 }
334 }
335 Node *last_expr = body->block.expr[array_size(body->block.expr) - 1];
336 node->expr_type = last_expr->expr_type;
337 } else {
338 if (!resolve_type(scope, body)) {
339 return false;
340 }
341 }
342
343 // Check that the type of body matches the return type.
344 StringView *type_body = &node->fun.body->expr_type->name;
345 StringView *return_type = &node->fun.return_type->string;
346 if (!sv_equal(type_body, return_type)) {
347 push_error(ERR_TYPE_PARSER, ERR_WRONG_RET_TYPE, node->line, node->col);
348 return false;
349 }
350 } break;
351 case NODE_BLOCK: {
352 scope = alloc_scope(scope);
353 for (size_t i = 0; i < array_size(node->block.expr); ++i) {
354 Node *expr = node->block.expr[i];
355 if (!resolve_type(scope, expr)) {
356 return false;
357 }
358 }
359 Node *last_expr = node->block.expr[array_size(node->block.expr) - 1];
360 node->expr_type = last_expr->expr_type;
361 } break;
362 case NODE_IF: {
363 if (!resolve_type(scope, node->ifexpr.cond)) {
364 return false;
365 }
366 if (!resolve_type(scope, node->ifexpr.expr_true)) {
367 return false;
368 }
369 Type *type_true = node->ifexpr.expr_true->expr_type;
370 node->expr_type = type_true;
371 if (node->ifexpr.expr_false != NULL) {
372 if (!resolve_type(scope, node->ifexpr.expr_false)) {
373 return false;
374 }
375 }
376
377 // Check ifexpr.cond is a bool.
378 Type *type_cond = node->ifexpr.cond->expr_type;
379 if (!sv_equal(&type_cond->name, &default_types[TYPE_BOOL].name)) {
380 push_error(ERR_TYPE_PARSER, ERR_WRONG_COND_TYPE,
381 node->line, node->col);
382 return false;
383 }
384
385 // Check if types of expr_true and expr_false match
386 if (node->ifexpr.expr_false != NULL) {
387 Type *type_false = node->ifexpr.expr_false->expr_type;
388 if (type_is_numeric(type_true) && type_is_numeric(type_false)) {
389 node->expr_type = coerce_numeric_types(type_true, type_false);
390 } else if (!sv_equal(&type_true->name, &type_false->name)) {
391 push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_T_F,
392 node->line, node->col);
393 return false;
394 }
395 }
396 } break;
397 case NODE_SET: {
398 node->expr_type = &default_types[TYPE_VOID];
399 if (!resolve_type(scope, node->set.symbol)) {
400 return false;
401 }
402 if (!resolve_type(scope, node->set.value)) {
403 return false;
404 }
405 Node *symbol = node->set.symbol;
406 Node *value = node->set.value;
407 if (!sv_equal(&symbol->expr_type->name, &value->expr_type->name)) {
408 push_error(ERR_TYPE_PARSER, ERR_TYPE_MISMATCH,
409 node->line, node->col);
410 return false;
411 }
412 } break;
413 case NODE_DEF: {
414 // Prepare value for symbol table.
415 Symbol *var = alloc_symval(node->def.symbol, SYMBOL_VAR);
416 var->var.type = node->def.type;
417 if (!insert_symbol(scope, node->def.symbol, var)) {
418 return false;
419 }
420
421 Type *type = find_type(scope, node->def.type);
422 if (type == NULL) {
423 return false;
424 }
425 node->def.symbol->expr_type = type;
426
427 node->expr_type = &default_types[TYPE_VOID];
428 // TODO: type inference from right side when not annotated?
429 if (!resolve_type(scope, node->def.value)) {
430 return false;
431 }
432 Node *symbol = node->def.symbol;
433 Node *value = node->def.value;
434 if (!sv_equal(&symbol->expr_type->name, &value->expr_type->name)) {
435 push_error(ERR_TYPE_PARSER, ERR_TYPE_MISMATCH,
436 node->line, node->col);
437 return false;
438 }
439 } break;
440 case NODE_NUMBER: {
441 // TODO: Numbers are f64/s64 unless explicitely annotated. Annotated
442 // numbers must fit in the given range (e.g. no negative constants
443 // inside a u64, no numbers bigger than 255 in a u8, etc.).
444 if (node->number.fractional != 0) {
445 node->expr_type = &default_types[TYPE_F64];
446 } else {
447 node->expr_type = &default_types[TYPE_S64];
448 }
449 } break;
450 case NODE_BOOL: {
451 node->expr_type = &default_types[TYPE_BOOL];
452 } break;
453 case NODE_STRING: {
454 node->expr_type = &default_types[TYPE_STR];
455 } break;
456 case NODE_FUNCALL: {
457 Symbol *val = find_symbol(scope, node->funcall.name);
458 if (!resolve_type(scope, node->funcall.name)) {
459 return false;
460 }
461 if (val->type != SYMBOL_FUN) {
462 push_error(ERR_TYPE_PARSER, ERR_WRONG_TYPE_FUN,
463 node->funcall.name->line, node->funcall.name->col);
464 return false;
465 }
466 if (array_size(node->funcall.args) != array_size(val->fun.param_types)) {
467 push_error(ERR_TYPE_PARSER, ERR_BAD_ARGS, node->line, node->col);
468 return false;
469 }
470 node->expr_type = node->funcall.name->expr_type;
471 for (size_t i = 0; i < array_size(node->funcall.args); ++i) {
472 Node *arg = node->funcall.args[i];
473 if (!resolve_type(scope, arg)) {
474 return false;
475 }
476 Node *expected = val->fun.param_types[i];
477 if (!sv_equal(&arg->expr_type->name, &expected->string)) {
478 push_error(ERR_TYPE_PARSER, ERR_TYPE_MISMATCH,
479 arg->line, arg->col);
480 return false;
481 }
482 }
483 } break;
484 default: break;
485 }
486 return true;
487}
488
489ParseTree *
490semantic_analysis(Root *roots) {
491 ParseTree *parse_tree = malloc(sizeof(ParseTree));
492 parse_tree->roots = roots;
493 parse_tree->global_scope = alloc_scope(NULL);
494 parse_tree->current_scope = parse_tree->global_scope;
495
496 // Fill global scope with default types.
497 HashTable *types = parse_tree->global_scope->types;
498 for (size_t i = 0; i < sizeof(default_types)/sizeof(Type); ++i) {
499 Type *type = &default_types[i];
500 ht_insert(types, &type->name, type);
501 }
502
503 // Fill up global function symbols.
504 Scope *scope = parse_tree->global_scope;
505 for (size_t i = 0; i < array_size(parse_tree->roots); ++i) {
506 Node *root = parse_tree->roots[i];
507 if (root->type == NODE_FUN) {
508 Node *name = root->fun.name;
509 Symbol *fun = alloc_symval(root->fun.name, SYMBOL_FUN);
510 fun->fun.param_types = root->fun.param_types;
511 fun->fun.return_type = root->fun.return_type;
512 if (!insert_symbol(scope, name, fun)) {
513 return NULL;
514 }
515 }
516 }
517
518 for (size_t i = 0; i < array_size(parse_tree->roots); ++i) {
519 // Fill up symbol tables in proper scope and resolve type of expression
520 // for all elements.
521 if (!resolve_type(scope, parse_tree->roots[i])) {
522 return NULL;
523 }
524 }
525
526 return parse_tree;
527}