diff options
Diffstat (limited to 'src/main.c')
-rw-r--r-- | src/main.c | 978 |
1 files changed, 1 insertions, 977 deletions
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include "badlib.h" | 5 | #include "badlib.h" |
6 | #include "lexer.c" | 6 | #include "lexer.c" |
7 | #include "parser.c" | ||
7 | 8 | ||
8 | typedef enum ExecMode { | 9 | typedef enum ExecMode { |
9 | RUN_NORMAL, | 10 | RUN_NORMAL, |
@@ -21,983 +22,6 @@ init(void) { | |||
21 | } | 22 | } |
22 | 23 | ||
23 | void | 24 | void |
24 | print_token(Token tok) { | ||
25 | println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val); | ||
26 | } | ||
27 | |||
28 | void | ||
29 | print_tokens(Str path, Token *tokens) { | ||
30 | for (sz i = 0; i < array_size(tokens); i++) { | ||
31 | Token tok = tokens[i]; | ||
32 | print("%s:", path); | ||
33 | print_token(tok); | ||
34 | } | ||
35 | } | ||
36 | |||
37 | // | ||
38 | // Nodes | ||
39 | // | ||
40 | |||
41 | typedef enum NodeKind { | ||
42 | // Arithmetic. | ||
43 | NODE_ADD, | ||
44 | NODE_SUB, | ||
45 | NODE_DIV, | ||
46 | NODE_MUL, | ||
47 | NODE_MOD, | ||
48 | // Logical. | ||
49 | NODE_NOT, | ||
50 | NODE_AND, | ||
51 | NODE_OR, | ||
52 | NODE_EQ, | ||
53 | NODE_NEQ, | ||
54 | NODE_LT, | ||
55 | NODE_GT, | ||
56 | NODE_LE, | ||
57 | NODE_GE, | ||
58 | // Bitwise ops. | ||
59 | NODE_BITNOT, | ||
60 | NODE_BITAND, | ||
61 | NODE_BITOR, | ||
62 | NODE_BITLSHIFT, | ||
63 | NODE_BITRSHIFT, | ||
64 | // Literals. | ||
65 | NODE_STRING, | ||
66 | NODE_SYMBOL, | ||
67 | NODE_NUM_INT, | ||
68 | NODE_NUM_UINT, | ||
69 | NODE_NUM_FLOAT, | ||
70 | NODE_TRUE, | ||
71 | NODE_FALSE, | ||
72 | NODE_NIL, | ||
73 | NODE_STRUCT_LIT, | ||
74 | // Keywords. | ||
75 | NODE_LET, | ||
76 | NODE_SET, | ||
77 | NODE_STRUCT, | ||
78 | NODE_IF, | ||
79 | NODE_MATCH, | ||
80 | NODE_CASE, | ||
81 | NODE_COND, | ||
82 | NODE_ENUM, | ||
83 | NODE_BREAK, | ||
84 | NODE_CONTINUE, | ||
85 | NODE_WHILE, | ||
86 | // Helpers. | ||
87 | NODE_SYMBOL_IDX, | ||
88 | NODE_TYPE, | ||
89 | NODE_ARR_TYPE, | ||
90 | NODE_COMPOUND_TYPE, | ||
91 | NODE_VAL_FIELD, | ||
92 | NODE_BLOCK, | ||
93 | } NodeKind; | ||
94 | |||
95 | Str node_str[] = { | ||
96 | // Arithmetic. | ||
97 | [NODE_ADD] = cstr("ADD"), | ||
98 | [NODE_SUB] = cstr("SUB"), | ||
99 | [NODE_DIV] = cstr("DIV"), | ||
100 | [NODE_MUL] = cstr("MUL"), | ||
101 | [NODE_MOD] = cstr("MOD"), | ||
102 | // Logical. | ||
103 | [NODE_NOT] = cstr("NOT"), | ||
104 | [NODE_AND] = cstr("AND"), | ||
105 | [NODE_OR] = cstr("OR"), | ||
106 | [NODE_EQ] = cstr("EQ"), | ||
107 | [NODE_NEQ] = cstr("NEQ"), | ||
108 | [NODE_LT] = cstr("LT"), | ||
109 | [NODE_GT] = cstr("GT"), | ||
110 | [NODE_LE] = cstr("LE"), | ||
111 | [NODE_GE] = cstr("GE"), | ||
112 | // Bitwise ops. | ||
113 | [NODE_BITNOT] = cstr("BITNOT"), | ||
114 | [NODE_BITAND] = cstr("BITAND"), | ||
115 | [NODE_BITOR] = cstr("BITOR"), | ||
116 | [NODE_BITLSHIFT] = cstr("BITLSHIFT"), | ||
117 | [NODE_BITRSHIFT] = cstr("BITRSHIFT"), | ||
118 | // Literals. | ||
119 | [NODE_STRING] = cstr("STRING"), | ||
120 | [NODE_SYMBOL] = cstr("SYMBOL"), | ||
121 | [NODE_NUM_INT] = cstr("INT"), | ||
122 | [NODE_NUM_UINT] = cstr("UINT"), | ||
123 | [NODE_NUM_FLOAT] = cstr("FLOAT"), | ||
124 | [NODE_TRUE] = cstr("TRUE"), | ||
125 | [NODE_FALSE] = cstr("FALSE"), | ||
126 | [NODE_NIL] = cstr("NIL"), | ||
127 | [NODE_STRUCT_LIT] = cstr("STRUCT LIT"), | ||
128 | // Keywords. | ||
129 | [NODE_LET] = cstr("LET"), | ||
130 | [NODE_SET] = cstr("SET"), | ||
131 | [NODE_STRUCT] = cstr("STRUCT DEF"), | ||
132 | [NODE_IF] = cstr("IF"), | ||
133 | [NODE_MATCH] = cstr("MATCH"), | ||
134 | [NODE_CASE] = cstr("CASE"), | ||
135 | [NODE_COND] = cstr("COND"), | ||
136 | [NODE_ENUM] = cstr("ENUM"), | ||
137 | [NODE_BREAK] = cstr("BREAK"), | ||
138 | [NODE_CONTINUE] = cstr("CONTINUE"), | ||
139 | [NODE_WHILE] = cstr("WHILE"), | ||
140 | // Helpers. | ||
141 | [NODE_TYPE] = cstr("TYPE"), | ||
142 | [NODE_ARR_TYPE] = cstr("TYPE (ARR)"), | ||
143 | [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"), | ||
144 | [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"), | ||
145 | [NODE_VAL_FIELD] = cstr("FIELD"), | ||
146 | [NODE_BLOCK] = cstr("BLOCK"), | ||
147 | }; | ||
148 | |||
149 | typedef struct Node { | ||
150 | sz id; | ||
151 | sz line; | ||
152 | sz col; | ||
153 | |||
154 | NodeKind kind; | ||
155 | union { | ||
156 | f64 d; | ||
157 | sz i; | ||
158 | u64 u; | ||
159 | Str str; | ||
160 | Str sym; | ||
161 | } value; | ||
162 | union { | ||
163 | struct { | ||
164 | struct Node *left; | ||
165 | struct Node *right; | ||
166 | }; | ||
167 | struct { | ||
168 | struct Node *next; | ||
169 | struct Node *arr_size; | ||
170 | }; | ||
171 | struct { | ||
172 | struct Node *var_name; | ||
173 | struct Node *var_type; | ||
174 | struct Node *var_val; | ||
175 | }; | ||
176 | struct { | ||
177 | struct Node *while_cond; | ||
178 | struct Node *while_expr; | ||
179 | }; | ||
180 | struct { | ||
181 | struct Node *cond_if; | ||
182 | struct Node *cond_expr; | ||
183 | struct Node *cond_else; | ||
184 | }; | ||
185 | struct { | ||
186 | struct Node *field_name; | ||
187 | struct Node *field_type; | ||
188 | struct Node *field_val; | ||
189 | }; | ||
190 | struct { | ||
191 | struct Node *match_expr; | ||
192 | struct Node **match_cases; | ||
193 | }; | ||
194 | struct { | ||
195 | struct Node *case_value; | ||
196 | struct Node *case_expr; | ||
197 | }; | ||
198 | struct Node **struct_field; | ||
199 | struct Node **elements; | ||
200 | struct Node **statements; | ||
201 | }; | ||
202 | bool is_ptr; | ||
203 | } Node; | ||
204 | |||
205 | // | ||
206 | // Parsing. | ||
207 | // | ||
208 | |||
209 | typedef struct Parser { | ||
210 | Token current; | ||
211 | Token previous; | ||
212 | Token *tokens; | ||
213 | sz idx; | ||
214 | |||
215 | // Error handling. | ||
216 | bool err; | ||
217 | bool panic; | ||
218 | Str file_name; | ||
219 | |||
220 | // Storage. | ||
221 | Node **nodes; | ||
222 | Arena *storage; | ||
223 | } Parser; | ||
224 | |||
225 | typedef enum { | ||
226 | PREC_NONE = 0, | ||
227 | PREC_LOW, // lowest precedence | ||
228 | PREC_BITLOGIC, // & | | ||
229 | PREC_BITSHIFT, // << >> | ||
230 | PREC_OR, // || | ||
231 | PREC_AND, // && | ||
232 | PREC_EQUALITY, // == != | ||
233 | PREC_COMPARISON, // < > <= >= | ||
234 | PREC_TERM, // + - | ||
235 | PREC_FACTOR, // * / % | ||
236 | PREC_UNARY, // ! - | ||
237 | PREC_CALL, // . () | ||
238 | PREC_PRIMARY // highest precedence | ||
239 | } ParsePrecedence; | ||
240 | |||
241 | typedef void (*ParseFn)(Parser *); | ||
242 | |||
243 | typedef struct { | ||
244 | ParseFn prefix; | ||
245 | ParseFn infix; | ||
246 | ParsePrecedence precedence; | ||
247 | } ParseRule; | ||
248 | |||
249 | void parse_expr(Parser *parser, ParsePrecedence precedence); | ||
250 | void parse_advance(Parser *parser); | ||
251 | bool parse_match(Parser *parser, TokenKind kind); | ||
252 | |||
253 | void parse_grouping(Parser *parser); | ||
254 | void parse_unary(Parser *parser); | ||
255 | void parse_binary(Parser *parser); | ||
256 | void parse_number(Parser *parser); | ||
257 | void parse_literal(Parser *parser); | ||
258 | void parse_string(Parser *parser); | ||
259 | void parse_symbol(Parser *parser); | ||
260 | void parse_keyword(Parser *parser); | ||
261 | void parse_type(Parser *parser); | ||
262 | void parse_block(Parser *parser); | ||
263 | |||
264 | ParseRule parse_rules[] = { | ||
265 | [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, | ||
266 | [TOK_LCURLY] = {parse_block, NULL, PREC_NONE}, | ||
267 | [TOK_AT] = {parse_symbol, NULL, PREC_NONE}, | ||
268 | |||
269 | // Arithmetic. | ||
270 | [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, | ||
271 | [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, | ||
272 | [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, | ||
273 | [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, | ||
274 | [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, | ||
275 | |||
276 | // Logical. | ||
277 | [TOK_NOT] = {parse_unary, NULL, PREC_NONE}, | ||
278 | [TOK_AND] = {NULL, parse_binary, PREC_AND}, | ||
279 | [TOK_OR] = {NULL, parse_binary, PREC_OR}, | ||
280 | [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY}, | ||
281 | [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY}, | ||
282 | [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON}, | ||
283 | [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON}, | ||
284 | [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON}, | ||
285 | [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON}, | ||
286 | |||
287 | // Bitwise ops. | ||
288 | [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE}, | ||
289 | [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC}, | ||
290 | [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC}, | ||
291 | [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, | ||
292 | [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, | ||
293 | |||
294 | // Literals. | ||
295 | [TOK_STRING] = {parse_string, NULL, PREC_NONE}, | ||
296 | [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE}, | ||
297 | [TOK_CHAR] = {parse_number, NULL, PREC_NONE}, | ||
298 | [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, | ||
299 | [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, | ||
300 | [TOK_TRUE] = {parse_literal, NULL, PREC_NONE}, | ||
301 | [TOK_FALSE] = {parse_literal, NULL, PREC_NONE}, | ||
302 | [TOK_NIL] = {parse_literal, NULL, PREC_NONE}, | ||
303 | |||
304 | // Statements. | ||
305 | [TOK_LET] = {parse_keyword, NULL, PREC_NONE}, | ||
306 | [TOK_SET] = {parse_keyword, NULL, PREC_NONE}, | ||
307 | [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE}, | ||
308 | [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE}, | ||
309 | [TOK_IF] = {parse_keyword, NULL, PREC_NONE}, | ||
310 | [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE}, | ||
311 | [TOK_COND] = {parse_keyword, NULL, PREC_NONE}, | ||
312 | [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE}, | ||
313 | [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE}, | ||
314 | [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE}, | ||
315 | [TOK_FUN] = {parse_keyword, NULL, PREC_NONE}, | ||
316 | [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE}, | ||
317 | |||
318 | [TOK_EOF] = {NULL, NULL, PREC_NONE}, | ||
319 | }; | ||
320 | |||
321 | Node * | ||
322 | node_alloc(Parser *parser, NodeKind kind, Token tok) { | ||
323 | if (parser->panic) return NULL; | ||
324 | static sz id = 0; | ||
325 | Node *node = arena_calloc((sz)sizeof(Node), parser->storage); | ||
326 | node->id = id++; | ||
327 | node->kind = kind; | ||
328 | node->line = tok.line; | ||
329 | node->col = tok.col; | ||
330 | return node; | ||
331 | } | ||
332 | |||
333 | void | ||
334 | parse_emit_err(Parser *parser, Token token, Str msg) { | ||
335 | if (parser->panic) return; | ||
336 | parser->panic = true; | ||
337 | parser->err = true; | ||
338 | eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col, | ||
339 | msg); | ||
340 | |||
341 | if (token.kind == TOK_EOF) { | ||
342 | eprintln(" at end of the file"); | ||
343 | } else if (token.kind != TOK_UNKNOWN) { | ||
344 | eprintln(" at %s", token.val); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | void | ||
349 | parse_advance(Parser *parser) { | ||
350 | assert(parser); | ||
351 | parser->previous = parser->current; | ||
352 | if (parser->previous.kind == TOK_EOF) { | ||
353 | return; | ||
354 | } | ||
355 | parser->current = parser->tokens[parser->idx++]; | ||
356 | } | ||
357 | |||
358 | bool | ||
359 | parse_match(Parser *parser, TokenKind kind) { | ||
360 | assert(parser); | ||
361 | if (parser->current.kind != kind) { | ||
362 | return false; | ||
363 | } | ||
364 | parse_advance(parser); | ||
365 | return true; | ||
366 | } | ||
367 | |||
368 | static void | ||
369 | parse_consume(Parser *parser, TokenKind kind, Str msg) { | ||
370 | if (parser->current.kind == kind) { | ||
371 | parse_advance(parser); | ||
372 | return; | ||
373 | } | ||
374 | parse_emit_err(parser, parser->current, msg); | ||
375 | } | ||
376 | |||
377 | void | ||
378 | parse_block(Parser *parser) { | ||
379 | #if DEBUG == 1 | ||
380 | print("parsing block "); | ||
381 | print_token(parser->previous); | ||
382 | #endif | ||
383 | Node *block = node_alloc(parser, NODE_BLOCK, parser->previous); | ||
384 | if (!block) return; | ||
385 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
386 | parse_expr(parser, PREC_LOW); | ||
387 | Node *next = array_pop(parser->nodes); | ||
388 | array_push(block->statements, next, parser->storage); | ||
389 | } | ||
390 | array_push(parser->nodes, block, parser->storage); | ||
391 | } | ||
392 | |||
393 | void | ||
394 | parse_expr(Parser *parser, ParsePrecedence precedence) { | ||
395 | parse_advance(parser); | ||
396 | // TODO: synchronize panic mode on keywords. | ||
397 | if (parser->panic) return; | ||
398 | ParseFn prefix = parse_rules[parser->previous.kind].prefix; | ||
399 | if (prefix == NULL) { | ||
400 | parse_emit_err(parser, parser->previous, cstr("expected expression")); | ||
401 | return; | ||
402 | } | ||
403 | prefix(parser); | ||
404 | |||
405 | while (precedence <= parse_rules[parser->current.kind].precedence) { | ||
406 | parse_advance(parser); | ||
407 | ParseFn infix = parse_rules[parser->previous.kind].infix; | ||
408 | if (infix == NULL) { | ||
409 | parse_emit_err(parser, parser->previous, | ||
410 | cstr("expected expression")); | ||
411 | return; | ||
412 | } | ||
413 | infix(parser); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | void | ||
418 | parse_unary(Parser *parser) { | ||
419 | if (parser->panic) return; | ||
420 | Token prev = parser->previous; | ||
421 | #if DEBUG == 1 | ||
422 | print("parsing unary "); | ||
423 | print_token(prev); | ||
424 | #endif | ||
425 | parse_expr(parser, PREC_UNARY); | ||
426 | Node *node = NULL; | ||
427 | switch (prev.kind) { | ||
428 | case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break; | ||
429 | case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break; | ||
430 | default: break; // Unreachable. | ||
431 | } | ||
432 | if (!node) return; | ||
433 | node->left = array_pop(parser->nodes); | ||
434 | array_push(parser->nodes, node, parser->storage); | ||
435 | } | ||
436 | |||
437 | void | ||
438 | parse_literal(Parser *parser) { | ||
439 | if (parser->panic) return; | ||
440 | Token prev = parser->previous; | ||
441 | #if DEBUG == 1 | ||
442 | print("parsing literal "); | ||
443 | print_token(prev); | ||
444 | #endif | ||
445 | Node *node = NULL; | ||
446 | switch (prev.kind) { | ||
447 | case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break; | ||
448 | case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break; | ||
449 | case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break; | ||
450 | default: return; // Unreachable. | ||
451 | } | ||
452 | if (!node) return; | ||
453 | array_push(parser->nodes, node, parser->storage); | ||
454 | } | ||
455 | |||
456 | void | ||
457 | parse_type(Parser *parser) { | ||
458 | Token prev = parser->previous; | ||
459 | #if DEBUG == 1 | ||
460 | print("parsing type "); | ||
461 | print_token(prev); | ||
462 | #endif | ||
463 | Node *node = node_alloc(parser, NODE_TYPE, prev); | ||
464 | if (!node) return; | ||
465 | if (parse_match(parser, TOK_AT)) { | ||
466 | node->is_ptr = true; | ||
467 | } | ||
468 | if (parse_match(parser, TOK_LCURLY)) { | ||
469 | node->kind = NODE_COMPOUND_TYPE; | ||
470 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
471 | parse_consume(parser, TOK_SYMBOL, cstr("expected name")); | ||
472 | parse_symbol(parser); | ||
473 | Node *next = array_pop(parser->nodes); | ||
474 | parse_consume(parser, TOK_COLON, cstr("incomplete type")); | ||
475 | parse_type(parser); | ||
476 | next->next = array_pop(parser->nodes); | ||
477 | array_push(node->elements, next, parser->storage); | ||
478 | } | ||
479 | } else { | ||
480 | parse_consume(parser, TOK_SYMBOL, | ||
481 | cstr("no type given for struct field")); | ||
482 | node->value.sym = parser->previous.val; | ||
483 | // Optional array value? | ||
484 | if (parse_match(parser, TOK_LSQUARE)) { | ||
485 | node->kind = NODE_ARR_TYPE, | ||
486 | parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); | ||
487 | parse_number(parser); | ||
488 | node->arr_size = array_pop(parser->nodes); | ||
489 | parse_consume(parser, TOK_RSQUARE, | ||
490 | cstr("unmatched brackets ']' in array type")); | ||
491 | } | ||
492 | } | ||
493 | array_push(parser->nodes, node, parser->storage); | ||
494 | } | ||
495 | |||
496 | void | ||
497 | parse_keyword(Parser *parser) { | ||
498 | if (parser->panic) return; | ||
499 | Token prev = parser->previous; | ||
500 | #if DEBUG == 1 | ||
501 | print("parsing keyword "); | ||
502 | print_token(prev); | ||
503 | #endif | ||
504 | Node *node = NULL; | ||
505 | switch (prev.kind) { | ||
506 | case TOK_LET: { | ||
507 | node = node_alloc(parser, NODE_LET, prev); | ||
508 | if (!node) return; | ||
509 | parse_consume(parser, TOK_SYMBOL, | ||
510 | cstr("expected symbol name on let expression")); | ||
511 | parse_symbol(parser); | ||
512 | node->var_name = array_pop(parser->nodes); | ||
513 | if (node->var_name->next) { | ||
514 | parse_emit_err(parser, prev, | ||
515 | cstr("invalid symbol name in let expression")); | ||
516 | return; | ||
517 | } | ||
518 | |||
519 | // Optional type declaration. | ||
520 | if (parse_match(parser, TOK_COLON)) { | ||
521 | parse_type(parser); | ||
522 | node->var_type = array_pop(parser->nodes); | ||
523 | } | ||
524 | |||
525 | // Optional assignment. | ||
526 | if (parse_match(parser, TOK_ASSIGN)) { | ||
527 | parse_expr(parser, PREC_LOW); | ||
528 | node->var_val = array_pop(parser->nodes); | ||
529 | } | ||
530 | } break; | ||
531 | case TOK_SET: { | ||
532 | node = node_alloc(parser, NODE_SET, prev); | ||
533 | if (!node) return; | ||
534 | parse_consume(parser, TOK_SYMBOL, | ||
535 | cstr("expected symbol name on let expression")); | ||
536 | parse_symbol(parser); | ||
537 | node->var_name = array_pop(parser->nodes); | ||
538 | parse_consume(parser, TOK_ASSIGN, | ||
539 | cstr("expected assignment on set expression")); | ||
540 | parse_expr(parser, PREC_LOW); | ||
541 | node->var_val = array_pop(parser->nodes); | ||
542 | } break; | ||
543 | case TOK_STRUCT: { | ||
544 | node = node_alloc(parser, NODE_STRUCT, prev); | ||
545 | if (!node) return; | ||
546 | parse_consume(parser, TOK_SYMBOL, | ||
547 | cstr("expected symbol name on struct definition")); | ||
548 | // Just consume this to avoid conflicts with struct literals. | ||
549 | node->value.sym = parser->previous.val; | ||
550 | |||
551 | parse_consume(parser, TOK_LCURLY, | ||
552 | cstr("expected '{' on struct definition")); | ||
553 | |||
554 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
555 | Node *field = | ||
556 | node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
557 | if (!field) return; | ||
558 | parse_consume( | ||
559 | parser, TOK_SYMBOL, | ||
560 | cstr("expected symbol name on struct definition")); | ||
561 | parse_symbol(parser); | ||
562 | field->field_name = array_pop(parser->nodes); | ||
563 | if (field->field_name->next || | ||
564 | field->field_name->kind == NODE_SYMBOL_IDX) { | ||
565 | parse_emit_err(parser, prev, | ||
566 | cstr("invalid symbol name in struct field")); | ||
567 | return; | ||
568 | } | ||
569 | parse_consume( | ||
570 | parser, TOK_COLON, | ||
571 | cstr("expected symbol name on struct definition")); | ||
572 | parse_type(parser); | ||
573 | field->field_type = array_pop(parser->nodes); | ||
574 | |||
575 | // Optional assignment. | ||
576 | if (parse_match(parser, TOK_ASSIGN)) { | ||
577 | parse_expr(parser, PREC_LOW); | ||
578 | field->field_val = array_pop(parser->nodes); | ||
579 | } | ||
580 | |||
581 | array_push(node->struct_field, field, parser->storage); | ||
582 | } | ||
583 | } break; | ||
584 | case TOK_IF: { | ||
585 | node = node_alloc(parser, NODE_IF, prev); | ||
586 | if (!node) return; | ||
587 | parse_consume(parser, TOK_LPAREN, | ||
588 | cstr("expected '(' on if expression")); | ||
589 | parse_expr(parser, PREC_LOW); | ||
590 | node->cond_if = array_pop(parser->nodes); | ||
591 | parse_consume(parser, TOK_RPAREN, | ||
592 | cstr("expected ')' on if expression")); | ||
593 | parse_expr(parser, PREC_LOW); | ||
594 | node->cond_expr = array_pop(parser->nodes); | ||
595 | if (parse_match(parser, TOK_ELSE)) { | ||
596 | parse_expr(parser, PREC_LOW); | ||
597 | node->cond_else = array_pop(parser->nodes); | ||
598 | } | ||
599 | } break; | ||
600 | case TOK_MATCH: { | ||
601 | node = node_alloc(parser, NODE_MATCH, prev); | ||
602 | if (!node) return; | ||
603 | parse_consume(parser, TOK_LPAREN, | ||
604 | cstr("expected '(' on if expression")); | ||
605 | parse_expr(parser, PREC_LOW); | ||
606 | node->match_expr = array_pop(parser->nodes); | ||
607 | parse_consume(parser, TOK_RPAREN, | ||
608 | cstr("expected ')' on if expression")); | ||
609 | parse_consume(parser, TOK_LCURLY, | ||
610 | cstr("expected block of match cases")); | ||
611 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
612 | Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); | ||
613 | if (!tmp) return; | ||
614 | // Are we on the default case. | ||
615 | if (!parse_match(parser, TOK_ELSE)) { | ||
616 | parse_consume(parser, TOK_CASE, | ||
617 | cstr("expected case statement")); | ||
618 | parse_expr(parser, PREC_LOW); | ||
619 | tmp->case_value = array_pop(parser->nodes); | ||
620 | } | ||
621 | parse_consume(parser, TOK_ASSIGN, | ||
622 | cstr("malformed case statement")); | ||
623 | parse_expr(parser, PREC_LOW); | ||
624 | tmp->case_expr = array_pop(parser->nodes); | ||
625 | array_push(node->match_cases, tmp, parser->storage); | ||
626 | } | ||
627 | // TODO: Check that we only have literals on the match case, this | ||
628 | // could be done on the analysis step, but also here... | ||
629 | // TODO: Check that there are no multiple default or duplicated | ||
630 | // cases. | ||
631 | } break; | ||
632 | case TOK_ENUM: { | ||
633 | node = node_alloc(parser, NODE_ENUM, prev); | ||
634 | if (!node) return; | ||
635 | parse_consume(parser, TOK_SYMBOL, | ||
636 | cstr("expected symbol name on enum definition")); | ||
637 | // Just consume this to avoid conflicts with struct literals. | ||
638 | node->value.sym = parser->previous.val; | ||
639 | parse_consume(parser, TOK_LCURLY, | ||
640 | cstr("expected '{' on enum definition")); | ||
641 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
642 | Node *field = | ||
643 | node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
644 | if (!field) return; | ||
645 | parse_consume(parser, TOK_SYMBOL, | ||
646 | cstr("expected symbol name on enum definition")); | ||
647 | parse_symbol(parser); | ||
648 | field->field_name = array_pop(parser->nodes); | ||
649 | if (field->field_name->next || | ||
650 | field->field_name->kind == NODE_SYMBOL_IDX) { | ||
651 | parse_emit_err(parser, prev, | ||
652 | cstr("invalid symbol name in enum field")); | ||
653 | return; | ||
654 | } | ||
655 | if (parse_match(parser, TOK_ASSIGN)) { | ||
656 | parse_expr(parser, PREC_LOW); | ||
657 | field->field_val = array_pop(parser->nodes); | ||
658 | } | ||
659 | array_push(node->struct_field, field, parser->storage); | ||
660 | } | ||
661 | } break; | ||
662 | case TOK_COND: { | ||
663 | node = node_alloc(parser, NODE_COND, prev); | ||
664 | if (!node) return; | ||
665 | parse_consume(parser, TOK_LCURLY, | ||
666 | cstr("expected '{' on cond expression")); | ||
667 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
668 | Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); | ||
669 | if (!tmp) return; | ||
670 | // Are we on the default case. | ||
671 | if (!parse_match(parser, TOK_ELSE)) { | ||
672 | parse_consume(parser, TOK_CASE, | ||
673 | cstr("expected case statement")); | ||
674 | parse_expr(parser, PREC_LOW); | ||
675 | tmp->case_value = array_pop(parser->nodes); | ||
676 | } | ||
677 | parse_consume(parser, TOK_ASSIGN, | ||
678 | cstr("malformed case statement")); | ||
679 | parse_expr(parser, PREC_LOW); | ||
680 | tmp->case_expr = array_pop(parser->nodes); | ||
681 | array_push(node->match_cases, tmp, parser->storage); | ||
682 | } | ||
683 | } break; | ||
684 | case TOK_BREAK: { | ||
685 | node = node_alloc(parser, NODE_BREAK, prev); | ||
686 | if (!node) return; | ||
687 | } break; | ||
688 | case TOK_CONTINUE: { | ||
689 | node = node_alloc(parser, NODE_CONTINUE, prev); | ||
690 | if (!node) return; | ||
691 | } break; | ||
692 | case TOK_WHILE: { | ||
693 | node = node_alloc(parser, NODE_WHILE, prev); | ||
694 | if (!node) return; | ||
695 | parse_consume(parser, TOK_LPAREN, | ||
696 | cstr("expected '(' on while expression")); | ||
697 | parse_expr(parser, PREC_LOW); | ||
698 | node->while_cond = array_pop(parser->nodes); | ||
699 | parse_consume(parser, TOK_RPAREN, | ||
700 | cstr("expected ')' on while expression")); | ||
701 | parse_expr(parser, PREC_LOW); | ||
702 | node->while_expr = array_pop(parser->nodes); | ||
703 | } break; | ||
704 | default: return; // Unreachable. | ||
705 | } | ||
706 | array_push(parser->nodes, node, parser->storage); | ||
707 | } | ||
708 | |||
709 | void | ||
710 | parse_binary(Parser *parser) { | ||
711 | if (parser->panic) return; | ||
712 | Token prev = parser->previous; | ||
713 | #if DEBUG == 1 | ||
714 | print("parsing binary "); | ||
715 | print_token(prev); | ||
716 | #endif | ||
717 | ParseRule rule = parse_rules[prev.kind]; | ||
718 | parse_expr(parser, rule.precedence + 1); | ||
719 | |||
720 | Node *node; | ||
721 | switch (prev.kind) { | ||
722 | case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break; | ||
723 | case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break; | ||
724 | case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break; | ||
725 | case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break; | ||
726 | case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break; | ||
727 | case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break; | ||
728 | case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break; | ||
729 | case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break; | ||
730 | case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break; | ||
731 | case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break; | ||
732 | case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break; | ||
733 | case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break; | ||
734 | case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break; | ||
735 | case TOK_BITAND: { | ||
736 | node = node_alloc(parser, NODE_BITAND, prev); | ||
737 | } break; | ||
738 | case TOK_BITOR: { | ||
739 | node = node_alloc(parser, NODE_BITOR, prev); | ||
740 | } break; | ||
741 | case TOK_BITLSHIFT: { | ||
742 | node = node_alloc(parser, NODE_BITLSHIFT, prev); | ||
743 | } break; | ||
744 | case TOK_BITRSHIFT: { | ||
745 | node = node_alloc(parser, NODE_BITRSHIFT, prev); | ||
746 | } break; | ||
747 | default: { | ||
748 | parse_emit_err(parser, prev, cstr("unreachable")); | ||
749 | return; | ||
750 | } | ||
751 | } | ||
752 | if (!node) return; | ||
753 | node->right = array_pop(parser->nodes); | ||
754 | node->left = array_pop(parser->nodes); | ||
755 | array_push(parser->nodes, node, parser->storage); | ||
756 | } | ||
757 | |||
758 | void | ||
759 | parse_number(Parser *parser) { | ||
760 | if (parser->panic) return; | ||
761 | Token prev = parser->previous; | ||
762 | #if DEBUG == 1 | ||
763 | print("parsing number "); | ||
764 | print_token(prev); | ||
765 | #endif | ||
766 | Node *node = NULL; | ||
767 | switch (prev.kind) { | ||
768 | case TOK_NUM_INT: { | ||
769 | if (str_has_prefix(prev.val, cstr("0x")) || | ||
770 | str_has_prefix(prev.val, cstr("0b"))) { | ||
771 | node = node_alloc(parser, NODE_NUM_UINT, prev); | ||
772 | if (!node) return; | ||
773 | node->value.u = str_to_uint(prev.val); | ||
774 | } else { | ||
775 | node = node_alloc(parser, NODE_NUM_INT, prev); | ||
776 | if (!node) return; | ||
777 | node->value.i = str_to_int(prev.val); | ||
778 | } | ||
779 | } break; | ||
780 | case TOK_NUM_FLOAT: { | ||
781 | node = node_alloc(parser, NODE_NUM_FLOAT, prev); | ||
782 | if (!node) return; | ||
783 | node->value.d = str_to_float(prev.val); | ||
784 | } break; | ||
785 | case TOK_CHAR: { | ||
786 | node = node_alloc(parser, NODE_NUM_INT, prev); | ||
787 | if (!node) return; | ||
788 | node->value.i = prev.val.mem[1]; | ||
789 | } break; | ||
790 | default: break; | ||
791 | } | ||
792 | array_push(parser->nodes, node, parser->storage); | ||
793 | } | ||
794 | |||
795 | void | ||
796 | parse_string(Parser *parser) { | ||
797 | if (parser->panic) return; | ||
798 | Token prev = parser->previous; | ||
799 | #if DEBUG == 1 | ||
800 | print("parsing string "); | ||
801 | print_token(prev); | ||
802 | #endif | ||
803 | Node *node = node_alloc(parser, NODE_STRING, prev); | ||
804 | if (!node) return; | ||
805 | node->value.str = str_remove_prefix(prev.val, cstr("\"")); | ||
806 | node->value.str = str_remove_suffix(node->value.str, cstr("\"")); | ||
807 | array_push(parser->nodes, node, parser->storage); | ||
808 | } | ||
809 | |||
810 | void | ||
811 | parse_symbol(Parser *parser) { | ||
812 | if (parser->panic) return; | ||
813 | Token prev = parser->previous; | ||
814 | #if DEBUG == 1 | ||
815 | print("parsing symbol "); | ||
816 | print_token(prev); | ||
817 | #endif | ||
818 | if (prev.kind == TOK_AT) { | ||
819 | parse_consume(parser, TOK_SYMBOL, | ||
820 | cstr("expected symbol after '.' operator")); | ||
821 | parse_symbol(parser); | ||
822 | Node *node = array_pop(parser->nodes); | ||
823 | if (node) { | ||
824 | node->is_ptr = true; | ||
825 | array_push(parser->nodes, node, parser->storage); | ||
826 | } | ||
827 | return; | ||
828 | } | ||
829 | Node *node = node_alloc(parser, NODE_SYMBOL, prev); | ||
830 | if (!node) return; | ||
831 | if (parse_match(parser, TOK_DOT)) { | ||
832 | // Symbol chain. | ||
833 | parse_consume(parser, TOK_SYMBOL, | ||
834 | cstr("expected symbol after '.' operator")); | ||
835 | parse_symbol(parser); | ||
836 | node->next = array_pop(parser->nodes); | ||
837 | } else if (parse_match(parser, TOK_LCURLY)) { | ||
838 | // Struct literal. | ||
839 | node->kind = NODE_STRUCT_LIT; | ||
840 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
841 | Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
842 | parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name")); | ||
843 | parse_symbol(parser); | ||
844 | field->field_name = array_pop(parser->nodes); | ||
845 | parse_consume(parser, TOK_ASSIGN, cstr("expected assignment")); | ||
846 | parse_expr(parser, PREC_LOW); | ||
847 | field->field_type = array_pop(parser->nodes); | ||
848 | array_push(node->elements, field, parser->storage); | ||
849 | } | ||
850 | } else if (parse_match(parser, TOK_LSQUARE)) { | ||
851 | node->kind = NODE_SYMBOL_IDX; | ||
852 | parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); | ||
853 | parse_number(parser); | ||
854 | node->arr_size = array_pop(parser->nodes); | ||
855 | parse_consume(parser, TOK_RSQUARE, | ||
856 | cstr("unmatched brackets ']' in array type")); | ||
857 | if (parse_match(parser, TOK_DOT)) { | ||
858 | // Symbol chain. | ||
859 | parse_consume(parser, TOK_SYMBOL, | ||
860 | cstr("expected symbol after '.' operator")); | ||
861 | parse_symbol(parser); | ||
862 | node->next = array_pop(parser->nodes); | ||
863 | } | ||
864 | } else { | ||
865 | node = node_alloc(parser, NODE_SYMBOL, prev); | ||
866 | if (!node) return; | ||
867 | } | ||
868 | node->value.sym = prev.val; | ||
869 | array_push(parser->nodes, node, parser->storage); | ||
870 | } | ||
871 | |||
872 | void | ||
873 | parse_grouping(Parser *parser) { | ||
874 | if (parser->panic) return; | ||
875 | #if DEBUG == 1 | ||
876 | print("parsing group "); | ||
877 | print_token(parser->previous); | ||
878 | #endif | ||
879 | parse_expr(parser, PREC_LOW); | ||
880 | parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); | ||
881 | } | ||
882 | |||
883 | void | ||
884 | graph_node(Node *node) { | ||
885 | if (node == NULL) return; | ||
886 | print("%d [width=2.5,shape=Mrecord,label=\"", node->id); | ||
887 | print("{ <type> %s | { L: %d | C: %d } } ", node_str[node->kind], | ||
888 | node->line, node->col); | ||
889 | switch (node->kind) { | ||
890 | case NODE_NUM_INT: print("| Value: %d", node->value.i); break; | ||
891 | case NODE_NUM_UINT: print("| Value: %x", node->value.u); break; | ||
892 | case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; | ||
893 | case NODE_STRING: print("| Value: %s", node->value.str); break; | ||
894 | case NODE_SYMBOL_IDX: | ||
895 | case NODE_STRUCT: | ||
896 | case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break; | ||
897 | case NODE_SYMBOL: | ||
898 | case NODE_TYPE: { | ||
899 | if (node->is_ptr) { | ||
900 | print("| Name: @%s", node->value.sym); | ||
901 | } else { | ||
902 | print("| Name: %s", node->value.sym); | ||
903 | } | ||
904 | } break; | ||
905 | default: break; | ||
906 | } | ||
907 | println("\"];"); | ||
908 | |||
909 | switch (node->kind) { | ||
910 | case NODE_COND: | ||
911 | case NODE_MATCH: { | ||
912 | if (node->match_expr) { | ||
913 | println("%d:e->%d:w;", node->id, node->match_expr->id); | ||
914 | graph_node(node->match_expr); | ||
915 | } | ||
916 | for (sz i = 0; i < array_size(node->match_cases); i++) { | ||
917 | Node *next = node->match_cases[i]; | ||
918 | println("%d:e->%d:w;", node->id, next->id); | ||
919 | graph_node(next); | ||
920 | } | ||
921 | } break; | ||
922 | case NODE_BLOCK: | ||
923 | case NODE_STRUCT_LIT: | ||
924 | case NODE_COMPOUND_TYPE: { | ||
925 | for (sz i = 0; i < array_size(node->elements); i++) { | ||
926 | Node *next = node->elements[i]; | ||
927 | println("%d:e->%d:w;", node->id, next->id); | ||
928 | graph_node(next); | ||
929 | } | ||
930 | } break; | ||
931 | case NODE_ENUM: | ||
932 | case NODE_STRUCT: { | ||
933 | for (sz i = 0; i < array_size(node->struct_field); i++) { | ||
934 | Node *next = node->struct_field[i]; | ||
935 | println("%d:e->%d:w;", node->id, next->id); | ||
936 | graph_node(next); | ||
937 | } | ||
938 | } break; | ||
939 | case NODE_IF: { | ||
940 | if (node->cond_if) { | ||
941 | println("%d:e->%d:w;", node->id, node->cond_if->id); | ||
942 | graph_node(node->cond_if); | ||
943 | } | ||
944 | if (node->cond_expr) { | ||
945 | println("%d:e->%d:w;", node->id, node->cond_expr->id); | ||
946 | graph_node(node->cond_expr); | ||
947 | } | ||
948 | if (node->cond_else) { | ||
949 | println("%d:e->%d:w;", node->id, node->cond_else->id); | ||
950 | graph_node(node->cond_else); | ||
951 | } | ||
952 | } break; | ||
953 | case NODE_VAL_FIELD: | ||
954 | case NODE_SET: | ||
955 | case NODE_LET: { | ||
956 | if (node->var_name) { | ||
957 | println("%d:e->%d:w;", node->id, node->var_name->id); | ||
958 | graph_node(node->var_name); | ||
959 | } | ||
960 | if (node->var_type) { | ||
961 | println("%d:e->%d:w;", node->id, node->var_type->id); | ||
962 | graph_node(node->var_type); | ||
963 | } | ||
964 | if (node->var_val) { | ||
965 | println("%d:e->%d:w;", node->id, node->var_val->id); | ||
966 | graph_node(node->var_val); | ||
967 | } | ||
968 | } break; | ||
969 | default: { | ||
970 | if (node->left) { | ||
971 | println("%d:e->%d:w;", node->id, node->left->id); | ||
972 | graph_node(node->left); | ||
973 | } | ||
974 | if (node->right) { | ||
975 | println("%d:e->%d:w;", node->id, node->right->id); | ||
976 | graph_node(node->right); | ||
977 | } | ||
978 | } break; | ||
979 | } | ||
980 | } | ||
981 | |||
982 | void | ||
983 | graph_ast(Node **roots) { | ||
984 | if (roots == NULL) return; | ||
985 | println("digraph ast {"); | ||
986 | println("rankdir=LR;"); | ||
987 | println("ranksep=\"0.95 equally\";"); | ||
988 | println("nodesep=\"0.5 equally\";"); | ||
989 | println("overlap=scale;"); | ||
990 | println("bgcolor=\"transparent\";"); | ||
991 | for (sz i = 0; i < array_size(roots); ++i) { | ||
992 | println("subgraph %d {", i); | ||
993 | Node *root = roots[array_size(roots) - 1 - i]; | ||
994 | graph_node(root); | ||
995 | println("}"); | ||
996 | } | ||
997 | println("}"); | ||
998 | } | ||
999 | |||
1000 | void | ||
1001 | process_file(Str path) { | 25 | process_file(Str path) { |
1002 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); | 26 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); |
1003 | 27 | ||