aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-06-19 08:51:35 +0200
committerBad Diode <bd@badd10de.dev>2024-06-19 08:51:35 +0200
commit27e6bf016e87702d30ebb7c0716ff3e4f08a8823 (patch)
treeac11f10945250a74b35c11db7e7c4aa7d0b63d51
parenteed58fa9d2bccf9df9128e6eff08fcd08b6fa95e (diff)
downloadbdl-27e6bf016e87702d30ebb7c0716ff3e4f08a8823.tar.gz
bdl-27e6bf016e87702d30ebb7c0716ff3e4f08a8823.zip
Move parser to its own file
-rw-r--r--src/main.c978
-rw-r--r--src/parser.c1340
-rw-r--r--src/parser.h18
3 files changed, 912 insertions, 1424 deletions
diff --git a/src/main.c b/src/main.c
index d001d9c..66142bd 100644
--- a/src/main.c
+++ b/src/main.c
@@ -4,6 +4,7 @@
4 4
5#include "badlib.h" 5#include "badlib.h"
6#include "lexer.c" 6#include "lexer.c"
7#include "parser.c"
7 8
8typedef enum ExecMode { 9typedef enum ExecMode {
9 RUN_NORMAL, 10 RUN_NORMAL,
@@ -21,983 +22,6 @@ init(void) {
21} 22}
22 23
23void 24void
24print_token(Token tok) {
25 println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val);
26}
27
28void
29print_tokens(Str path, Token *tokens) {
30 for (sz i = 0; i < array_size(tokens); i++) {
31 Token tok = tokens[i];
32 print("%s:", path);
33 print_token(tok);
34 }
35}
36
37//
38// Nodes
39//
40
41typedef enum NodeKind {
42 // Arithmetic.
43 NODE_ADD,
44 NODE_SUB,
45 NODE_DIV,
46 NODE_MUL,
47 NODE_MOD,
48 // Logical.
49 NODE_NOT,
50 NODE_AND,
51 NODE_OR,
52 NODE_EQ,
53 NODE_NEQ,
54 NODE_LT,
55 NODE_GT,
56 NODE_LE,
57 NODE_GE,
58 // Bitwise ops.
59 NODE_BITNOT,
60 NODE_BITAND,
61 NODE_BITOR,
62 NODE_BITLSHIFT,
63 NODE_BITRSHIFT,
64 // Literals.
65 NODE_STRING,
66 NODE_SYMBOL,
67 NODE_NUM_INT,
68 NODE_NUM_UINT,
69 NODE_NUM_FLOAT,
70 NODE_TRUE,
71 NODE_FALSE,
72 NODE_NIL,
73 NODE_STRUCT_LIT,
74 // Keywords.
75 NODE_LET,
76 NODE_SET,
77 NODE_STRUCT,
78 NODE_IF,
79 NODE_MATCH,
80 NODE_CASE,
81 NODE_COND,
82 NODE_ENUM,
83 NODE_BREAK,
84 NODE_CONTINUE,
85 NODE_WHILE,
86 // Helpers.
87 NODE_SYMBOL_IDX,
88 NODE_TYPE,
89 NODE_ARR_TYPE,
90 NODE_COMPOUND_TYPE,
91 NODE_VAL_FIELD,
92 NODE_BLOCK,
93} NodeKind;
94
95Str node_str[] = {
96 // Arithmetic.
97 [NODE_ADD] = cstr("ADD"),
98 [NODE_SUB] = cstr("SUB"),
99 [NODE_DIV] = cstr("DIV"),
100 [NODE_MUL] = cstr("MUL"),
101 [NODE_MOD] = cstr("MOD"),
102 // Logical.
103 [NODE_NOT] = cstr("NOT"),
104 [NODE_AND] = cstr("AND"),
105 [NODE_OR] = cstr("OR"),
106 [NODE_EQ] = cstr("EQ"),
107 [NODE_NEQ] = cstr("NEQ"),
108 [NODE_LT] = cstr("LT"),
109 [NODE_GT] = cstr("GT"),
110 [NODE_LE] = cstr("LE"),
111 [NODE_GE] = cstr("GE"),
112 // Bitwise ops.
113 [NODE_BITNOT] = cstr("BITNOT"),
114 [NODE_BITAND] = cstr("BITAND"),
115 [NODE_BITOR] = cstr("BITOR"),
116 [NODE_BITLSHIFT] = cstr("BITLSHIFT"),
117 [NODE_BITRSHIFT] = cstr("BITRSHIFT"),
118 // Literals.
119 [NODE_STRING] = cstr("STRING"),
120 [NODE_SYMBOL] = cstr("SYMBOL"),
121 [NODE_NUM_INT] = cstr("INT"),
122 [NODE_NUM_UINT] = cstr("UINT"),
123 [NODE_NUM_FLOAT] = cstr("FLOAT"),
124 [NODE_TRUE] = cstr("TRUE"),
125 [NODE_FALSE] = cstr("FALSE"),
126 [NODE_NIL] = cstr("NIL"),
127 [NODE_STRUCT_LIT] = cstr("STRUCT LIT"),
128 // Keywords.
129 [NODE_LET] = cstr("LET"),
130 [NODE_SET] = cstr("SET"),
131 [NODE_STRUCT] = cstr("STRUCT DEF"),
132 [NODE_IF] = cstr("IF"),
133 [NODE_MATCH] = cstr("MATCH"),
134 [NODE_CASE] = cstr("CASE"),
135 [NODE_COND] = cstr("COND"),
136 [NODE_ENUM] = cstr("ENUM"),
137 [NODE_BREAK] = cstr("BREAK"),
138 [NODE_CONTINUE] = cstr("CONTINUE"),
139 [NODE_WHILE] = cstr("WHILE"),
140 // Helpers.
141 [NODE_TYPE] = cstr("TYPE"),
142 [NODE_ARR_TYPE] = cstr("TYPE (ARR)"),
143 [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"),
144 [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"),
145 [NODE_VAL_FIELD] = cstr("FIELD"),
146 [NODE_BLOCK] = cstr("BLOCK"),
147};
148
149typedef struct Node {
150 sz id;
151 sz line;
152 sz col;
153
154 NodeKind kind;
155 union {
156 f64 d;
157 sz i;
158 u64 u;
159 Str str;
160 Str sym;
161 } value;
162 union {
163 struct {
164 struct Node *left;
165 struct Node *right;
166 };
167 struct {
168 struct Node *next;
169 struct Node *arr_size;
170 };
171 struct {
172 struct Node *var_name;
173 struct Node *var_type;
174 struct Node *var_val;
175 };
176 struct {
177 struct Node *while_cond;
178 struct Node *while_expr;
179 };
180 struct {
181 struct Node *cond_if;
182 struct Node *cond_expr;
183 struct Node *cond_else;
184 };
185 struct {
186 struct Node *field_name;
187 struct Node *field_type;
188 struct Node *field_val;
189 };
190 struct {
191 struct Node *match_expr;
192 struct Node **match_cases;
193 };
194 struct {
195 struct Node *case_value;
196 struct Node *case_expr;
197 };
198 struct Node **struct_field;
199 struct Node **elements;
200 struct Node **statements;
201 };
202 bool is_ptr;
203} Node;
204
205//
206// Parsing.
207//
208
209typedef struct Parser {
210 Token current;
211 Token previous;
212 Token *tokens;
213 sz idx;
214
215 // Error handling.
216 bool err;
217 bool panic;
218 Str file_name;
219
220 // Storage.
221 Node **nodes;
222 Arena *storage;
223} Parser;
224
225typedef enum {
226 PREC_NONE = 0,
227 PREC_LOW, // lowest precedence
228 PREC_BITLOGIC, // & |
229 PREC_BITSHIFT, // << >>
230 PREC_OR, // ||
231 PREC_AND, // &&
232 PREC_EQUALITY, // == !=
233 PREC_COMPARISON, // < > <= >=
234 PREC_TERM, // + -
235 PREC_FACTOR, // * / %
236 PREC_UNARY, // ! -
237 PREC_CALL, // . ()
238 PREC_PRIMARY // highest precedence
239} ParsePrecedence;
240
241typedef void (*ParseFn)(Parser *);
242
243typedef struct {
244 ParseFn prefix;
245 ParseFn infix;
246 ParsePrecedence precedence;
247} ParseRule;
248
249void parse_expr(Parser *parser, ParsePrecedence precedence);
250void parse_advance(Parser *parser);
251bool parse_match(Parser *parser, TokenKind kind);
252
253void parse_grouping(Parser *parser);
254void parse_unary(Parser *parser);
255void parse_binary(Parser *parser);
256void parse_number(Parser *parser);
257void parse_literal(Parser *parser);
258void parse_string(Parser *parser);
259void parse_symbol(Parser *parser);
260void parse_keyword(Parser *parser);
261void parse_type(Parser *parser);
262void parse_block(Parser *parser);
263
264ParseRule parse_rules[] = {
265 [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE},
266 [TOK_LCURLY] = {parse_block, NULL, PREC_NONE},
267 [TOK_AT] = {parse_symbol, NULL, PREC_NONE},
268
269 // Arithmetic.
270 [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM},
271 [TOK_ADD] = {NULL, parse_binary, PREC_TERM},
272 [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR},
273 [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR},
274 [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR},
275
276 // Logical.
277 [TOK_NOT] = {parse_unary, NULL, PREC_NONE},
278 [TOK_AND] = {NULL, parse_binary, PREC_AND},
279 [TOK_OR] = {NULL, parse_binary, PREC_OR},
280 [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY},
281 [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY},
282 [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON},
283 [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON},
284 [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON},
285 [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON},
286
287 // Bitwise ops.
288 [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE},
289 [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC},
290 [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC},
291 [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT},
292 [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT},
293
294 // Literals.
295 [TOK_STRING] = {parse_string, NULL, PREC_NONE},
296 [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE},
297 [TOK_CHAR] = {parse_number, NULL, PREC_NONE},
298 [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE},
299 [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE},
300 [TOK_TRUE] = {parse_literal, NULL, PREC_NONE},
301 [TOK_FALSE] = {parse_literal, NULL, PREC_NONE},
302 [TOK_NIL] = {parse_literal, NULL, PREC_NONE},
303
304 // Statements.
305 [TOK_LET] = {parse_keyword, NULL, PREC_NONE},
306 [TOK_SET] = {parse_keyword, NULL, PREC_NONE},
307 [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE},
308 [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE},
309 [TOK_IF] = {parse_keyword, NULL, PREC_NONE},
310 [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE},
311 [TOK_COND] = {parse_keyword, NULL, PREC_NONE},
312 [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE},
313 [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE},
314 [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE},
315 [TOK_FUN] = {parse_keyword, NULL, PREC_NONE},
316 [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE},
317
318 [TOK_EOF] = {NULL, NULL, PREC_NONE},
319};
320
321Node *
322node_alloc(Parser *parser, NodeKind kind, Token tok) {
323 if (parser->panic) return NULL;
324 static sz id = 0;
325 Node *node = arena_calloc((sz)sizeof(Node), parser->storage);
326 node->id = id++;
327 node->kind = kind;
328 node->line = tok.line;
329 node->col = tok.col;
330 return node;
331}
332
333void
334parse_emit_err(Parser *parser, Token token, Str msg) {
335 if (parser->panic) return;
336 parser->panic = true;
337 parser->err = true;
338 eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col,
339 msg);
340
341 if (token.kind == TOK_EOF) {
342 eprintln(" at end of the file");
343 } else if (token.kind != TOK_UNKNOWN) {
344 eprintln(" at %s", token.val);
345 }
346}
347
348void
349parse_advance(Parser *parser) {
350 assert(parser);
351 parser->previous = parser->current;
352 if (parser->previous.kind == TOK_EOF) {
353 return;
354 }
355 parser->current = parser->tokens[parser->idx++];
356}
357
358bool
359parse_match(Parser *parser, TokenKind kind) {
360 assert(parser);
361 if (parser->current.kind != kind) {
362 return false;
363 }
364 parse_advance(parser);
365 return true;
366}
367
368static void
369parse_consume(Parser *parser, TokenKind kind, Str msg) {
370 if (parser->current.kind == kind) {
371 parse_advance(parser);
372 return;
373 }
374 parse_emit_err(parser, parser->current, msg);
375}
376
377void
378parse_block(Parser *parser) {
379#if DEBUG == 1
380 print("parsing block ");
381 print_token(parser->previous);
382#endif
383 Node *block = node_alloc(parser, NODE_BLOCK, parser->previous);
384 if (!block) return;
385 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
386 parse_expr(parser, PREC_LOW);
387 Node *next = array_pop(parser->nodes);
388 array_push(block->statements, next, parser->storage);
389 }
390 array_push(parser->nodes, block, parser->storage);
391}
392
393void
394parse_expr(Parser *parser, ParsePrecedence precedence) {
395 parse_advance(parser);
396 // TODO: synchronize panic mode on keywords.
397 if (parser->panic) return;
398 ParseFn prefix = parse_rules[parser->previous.kind].prefix;
399 if (prefix == NULL) {
400 parse_emit_err(parser, parser->previous, cstr("expected expression"));
401 return;
402 }
403 prefix(parser);
404
405 while (precedence <= parse_rules[parser->current.kind].precedence) {
406 parse_advance(parser);
407 ParseFn infix = parse_rules[parser->previous.kind].infix;
408 if (infix == NULL) {
409 parse_emit_err(parser, parser->previous,
410 cstr("expected expression"));
411 return;
412 }
413 infix(parser);
414 }
415}
416
417void
418parse_unary(Parser *parser) {
419 if (parser->panic) return;
420 Token prev = parser->previous;
421#if DEBUG == 1
422 print("parsing unary ");
423 print_token(prev);
424#endif
425 parse_expr(parser, PREC_UNARY);
426 Node *node = NULL;
427 switch (prev.kind) {
428 case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break;
429 case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break;
430 default: break; // Unreachable.
431 }
432 if (!node) return;
433 node->left = array_pop(parser->nodes);
434 array_push(parser->nodes, node, parser->storage);
435}
436
437void
438parse_literal(Parser *parser) {
439 if (parser->panic) return;
440 Token prev = parser->previous;
441#if DEBUG == 1
442 print("parsing literal ");
443 print_token(prev);
444#endif
445 Node *node = NULL;
446 switch (prev.kind) {
447 case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break;
448 case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break;
449 case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break;
450 default: return; // Unreachable.
451 }
452 if (!node) return;
453 array_push(parser->nodes, node, parser->storage);
454}
455
456void
457parse_type(Parser *parser) {
458 Token prev = parser->previous;
459#if DEBUG == 1
460 print("parsing type ");
461 print_token(prev);
462#endif
463 Node *node = node_alloc(parser, NODE_TYPE, prev);
464 if (!node) return;
465 if (parse_match(parser, TOK_AT)) {
466 node->is_ptr = true;
467 }
468 if (parse_match(parser, TOK_LCURLY)) {
469 node->kind = NODE_COMPOUND_TYPE;
470 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
471 parse_consume(parser, TOK_SYMBOL, cstr("expected name"));
472 parse_symbol(parser);
473 Node *next = array_pop(parser->nodes);
474 parse_consume(parser, TOK_COLON, cstr("incomplete type"));
475 parse_type(parser);
476 next->next = array_pop(parser->nodes);
477 array_push(node->elements, next, parser->storage);
478 }
479 } else {
480 parse_consume(parser, TOK_SYMBOL,
481 cstr("no type given for struct field"));
482 node->value.sym = parser->previous.val;
483 // Optional array value?
484 if (parse_match(parser, TOK_LSQUARE)) {
485 node->kind = NODE_ARR_TYPE,
486 parse_consume(parser, TOK_NUM_INT, cstr("no array size given"));
487 parse_number(parser);
488 node->arr_size = array_pop(parser->nodes);
489 parse_consume(parser, TOK_RSQUARE,
490 cstr("unmatched brackets ']' in array type"));
491 }
492 }
493 array_push(parser->nodes, node, parser->storage);
494}
495
496void
497parse_keyword(Parser *parser) {
498 if (parser->panic) return;
499 Token prev = parser->previous;
500#if DEBUG == 1
501 print("parsing keyword ");
502 print_token(prev);
503#endif
504 Node *node = NULL;
505 switch (prev.kind) {
506 case TOK_LET: {
507 node = node_alloc(parser, NODE_LET, prev);
508 if (!node) return;
509 parse_consume(parser, TOK_SYMBOL,
510 cstr("expected symbol name on let expression"));
511 parse_symbol(parser);
512 node->var_name = array_pop(parser->nodes);
513 if (node->var_name->next) {
514 parse_emit_err(parser, prev,
515 cstr("invalid symbol name in let expression"));
516 return;
517 }
518
519 // Optional type declaration.
520 if (parse_match(parser, TOK_COLON)) {
521 parse_type(parser);
522 node->var_type = array_pop(parser->nodes);
523 }
524
525 // Optional assignment.
526 if (parse_match(parser, TOK_ASSIGN)) {
527 parse_expr(parser, PREC_LOW);
528 node->var_val = array_pop(parser->nodes);
529 }
530 } break;
531 case TOK_SET: {
532 node = node_alloc(parser, NODE_SET, prev);
533 if (!node) return;
534 parse_consume(parser, TOK_SYMBOL,
535 cstr("expected symbol name on let expression"));
536 parse_symbol(parser);
537 node->var_name = array_pop(parser->nodes);
538 parse_consume(parser, TOK_ASSIGN,
539 cstr("expected assignment on set expression"));
540 parse_expr(parser, PREC_LOW);
541 node->var_val = array_pop(parser->nodes);
542 } break;
543 case TOK_STRUCT: {
544 node = node_alloc(parser, NODE_STRUCT, prev);
545 if (!node) return;
546 parse_consume(parser, TOK_SYMBOL,
547 cstr("expected symbol name on struct definition"));
548 // Just consume this to avoid conflicts with struct literals.
549 node->value.sym = parser->previous.val;
550
551 parse_consume(parser, TOK_LCURLY,
552 cstr("expected '{' on struct definition"));
553
554 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
555 Node *field =
556 node_alloc(parser, NODE_VAL_FIELD, parser->current);
557 if (!field) return;
558 parse_consume(
559 parser, TOK_SYMBOL,
560 cstr("expected symbol name on struct definition"));
561 parse_symbol(parser);
562 field->field_name = array_pop(parser->nodes);
563 if (field->field_name->next ||
564 field->field_name->kind == NODE_SYMBOL_IDX) {
565 parse_emit_err(parser, prev,
566 cstr("invalid symbol name in struct field"));
567 return;
568 }
569 parse_consume(
570 parser, TOK_COLON,
571 cstr("expected symbol name on struct definition"));
572 parse_type(parser);
573 field->field_type = array_pop(parser->nodes);
574
575 // Optional assignment.
576 if (parse_match(parser, TOK_ASSIGN)) {
577 parse_expr(parser, PREC_LOW);
578 field->field_val = array_pop(parser->nodes);
579 }
580
581 array_push(node->struct_field, field, parser->storage);
582 }
583 } break;
584 case TOK_IF: {
585 node = node_alloc(parser, NODE_IF, prev);
586 if (!node) return;
587 parse_consume(parser, TOK_LPAREN,
588 cstr("expected '(' on if expression"));
589 parse_expr(parser, PREC_LOW);
590 node->cond_if = array_pop(parser->nodes);
591 parse_consume(parser, TOK_RPAREN,
592 cstr("expected ')' on if expression"));
593 parse_expr(parser, PREC_LOW);
594 node->cond_expr = array_pop(parser->nodes);
595 if (parse_match(parser, TOK_ELSE)) {
596 parse_expr(parser, PREC_LOW);
597 node->cond_else = array_pop(parser->nodes);
598 }
599 } break;
600 case TOK_MATCH: {
601 node = node_alloc(parser, NODE_MATCH, prev);
602 if (!node) return;
603 parse_consume(parser, TOK_LPAREN,
604 cstr("expected '(' on if expression"));
605 parse_expr(parser, PREC_LOW);
606 node->match_expr = array_pop(parser->nodes);
607 parse_consume(parser, TOK_RPAREN,
608 cstr("expected ')' on if expression"));
609 parse_consume(parser, TOK_LCURLY,
610 cstr("expected block of match cases"));
611 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
612 Node *tmp = node_alloc(parser, NODE_CASE, parser->previous);
613 if (!tmp) return;
614 // Are we on the default case.
615 if (!parse_match(parser, TOK_ELSE)) {
616 parse_consume(parser, TOK_CASE,
617 cstr("expected case statement"));
618 parse_expr(parser, PREC_LOW);
619 tmp->case_value = array_pop(parser->nodes);
620 }
621 parse_consume(parser, TOK_ASSIGN,
622 cstr("malformed case statement"));
623 parse_expr(parser, PREC_LOW);
624 tmp->case_expr = array_pop(parser->nodes);
625 array_push(node->match_cases, tmp, parser->storage);
626 }
627 // TODO: Check that we only have literals on the match case, this
628 // could be done on the analysis step, but also here...
629 // TODO: Check that there are no multiple default or duplicated
630 // cases.
631 } break;
632 case TOK_ENUM: {
633 node = node_alloc(parser, NODE_ENUM, prev);
634 if (!node) return;
635 parse_consume(parser, TOK_SYMBOL,
636 cstr("expected symbol name on enum definition"));
637 // Just consume this to avoid conflicts with struct literals.
638 node->value.sym = parser->previous.val;
639 parse_consume(parser, TOK_LCURLY,
640 cstr("expected '{' on enum definition"));
641 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
642 Node *field =
643 node_alloc(parser, NODE_VAL_FIELD, parser->current);
644 if (!field) return;
645 parse_consume(parser, TOK_SYMBOL,
646 cstr("expected symbol name on enum definition"));
647 parse_symbol(parser);
648 field->field_name = array_pop(parser->nodes);
649 if (field->field_name->next ||
650 field->field_name->kind == NODE_SYMBOL_IDX) {
651 parse_emit_err(parser, prev,
652 cstr("invalid symbol name in enum field"));
653 return;
654 }
655 if (parse_match(parser, TOK_ASSIGN)) {
656 parse_expr(parser, PREC_LOW);
657 field->field_val = array_pop(parser->nodes);
658 }
659 array_push(node->struct_field, field, parser->storage);
660 }
661 } break;
662 case TOK_COND: {
663 node = node_alloc(parser, NODE_COND, prev);
664 if (!node) return;
665 parse_consume(parser, TOK_LCURLY,
666 cstr("expected '{' on cond expression"));
667 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
668 Node *tmp = node_alloc(parser, NODE_CASE, parser->previous);
669 if (!tmp) return;
670 // Are we on the default case.
671 if (!parse_match(parser, TOK_ELSE)) {
672 parse_consume(parser, TOK_CASE,
673 cstr("expected case statement"));
674 parse_expr(parser, PREC_LOW);
675 tmp->case_value = array_pop(parser->nodes);
676 }
677 parse_consume(parser, TOK_ASSIGN,
678 cstr("malformed case statement"));
679 parse_expr(parser, PREC_LOW);
680 tmp->case_expr = array_pop(parser->nodes);
681 array_push(node->match_cases, tmp, parser->storage);
682 }
683 } break;
684 case TOK_BREAK: {
685 node = node_alloc(parser, NODE_BREAK, prev);
686 if (!node) return;
687 } break;
688 case TOK_CONTINUE: {
689 node = node_alloc(parser, NODE_CONTINUE, prev);
690 if (!node) return;
691 } break;
692 case TOK_WHILE: {
693 node = node_alloc(parser, NODE_WHILE, prev);
694 if (!node) return;
695 parse_consume(parser, TOK_LPAREN,
696 cstr("expected '(' on while expression"));
697 parse_expr(parser, PREC_LOW);
698 node->while_cond = array_pop(parser->nodes);
699 parse_consume(parser, TOK_RPAREN,
700 cstr("expected ')' on while expression"));
701 parse_expr(parser, PREC_LOW);
702 node->while_expr = array_pop(parser->nodes);
703 } break;
704 default: return; // Unreachable.
705 }
706 array_push(parser->nodes, node, parser->storage);
707}
708
709void
710parse_binary(Parser *parser) {
711 if (parser->panic) return;
712 Token prev = parser->previous;
713#if DEBUG == 1
714 print("parsing binary ");
715 print_token(prev);
716#endif
717 ParseRule rule = parse_rules[prev.kind];
718 parse_expr(parser, rule.precedence + 1);
719
720 Node *node;
721 switch (prev.kind) {
722 case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break;
723 case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break;
724 case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break;
725 case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break;
726 case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break;
727 case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break;
728 case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break;
729 case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break;
730 case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break;
731 case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break;
732 case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break;
733 case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break;
734 case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break;
735 case TOK_BITAND: {
736 node = node_alloc(parser, NODE_BITAND, prev);
737 } break;
738 case TOK_BITOR: {
739 node = node_alloc(parser, NODE_BITOR, prev);
740 } break;
741 case TOK_BITLSHIFT: {
742 node = node_alloc(parser, NODE_BITLSHIFT, prev);
743 } break;
744 case TOK_BITRSHIFT: {
745 node = node_alloc(parser, NODE_BITRSHIFT, prev);
746 } break;
747 default: {
748 parse_emit_err(parser, prev, cstr("unreachable"));
749 return;
750 }
751 }
752 if (!node) return;
753 node->right = array_pop(parser->nodes);
754 node->left = array_pop(parser->nodes);
755 array_push(parser->nodes, node, parser->storage);
756}
757
758void
759parse_number(Parser *parser) {
760 if (parser->panic) return;
761 Token prev = parser->previous;
762#if DEBUG == 1
763 print("parsing number ");
764 print_token(prev);
765#endif
766 Node *node = NULL;
767 switch (prev.kind) {
768 case TOK_NUM_INT: {
769 if (str_has_prefix(prev.val, cstr("0x")) ||
770 str_has_prefix(prev.val, cstr("0b"))) {
771 node = node_alloc(parser, NODE_NUM_UINT, prev);
772 if (!node) return;
773 node->value.u = str_to_uint(prev.val);
774 } else {
775 node = node_alloc(parser, NODE_NUM_INT, prev);
776 if (!node) return;
777 node->value.i = str_to_int(prev.val);
778 }
779 } break;
780 case TOK_NUM_FLOAT: {
781 node = node_alloc(parser, NODE_NUM_FLOAT, prev);
782 if (!node) return;
783 node->value.d = str_to_float(prev.val);
784 } break;
785 case TOK_CHAR: {
786 node = node_alloc(parser, NODE_NUM_INT, prev);
787 if (!node) return;
788 node->value.i = prev.val.mem[1];
789 } break;
790 default: break;
791 }
792 array_push(parser->nodes, node, parser->storage);
793}
794
795void
796parse_string(Parser *parser) {
797 if (parser->panic) return;
798 Token prev = parser->previous;
799#if DEBUG == 1
800 print("parsing string ");
801 print_token(prev);
802#endif
803 Node *node = node_alloc(parser, NODE_STRING, prev);
804 if (!node) return;
805 node->value.str = str_remove_prefix(prev.val, cstr("\""));
806 node->value.str = str_remove_suffix(node->value.str, cstr("\""));
807 array_push(parser->nodes, node, parser->storage);
808}
809
810void
811parse_symbol(Parser *parser) {
812 if (parser->panic) return;
813 Token prev = parser->previous;
814#if DEBUG == 1
815 print("parsing symbol ");
816 print_token(prev);
817#endif
818 if (prev.kind == TOK_AT) {
819 parse_consume(parser, TOK_SYMBOL,
820 cstr("expected symbol after '.' operator"));
821 parse_symbol(parser);
822 Node *node = array_pop(parser->nodes);
823 if (node) {
824 node->is_ptr = true;
825 array_push(parser->nodes, node, parser->storage);
826 }
827 return;
828 }
829 Node *node = node_alloc(parser, NODE_SYMBOL, prev);
830 if (!node) return;
831 if (parse_match(parser, TOK_DOT)) {
832 // Symbol chain.
833 parse_consume(parser, TOK_SYMBOL,
834 cstr("expected symbol after '.' operator"));
835 parse_symbol(parser);
836 node->next = array_pop(parser->nodes);
837 } else if (parse_match(parser, TOK_LCURLY)) {
838 // Struct literal.
839 node->kind = NODE_STRUCT_LIT;
840 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
841 Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current);
842 parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name"));
843 parse_symbol(parser);
844 field->field_name = array_pop(parser->nodes);
845 parse_consume(parser, TOK_ASSIGN, cstr("expected assignment"));
846 parse_expr(parser, PREC_LOW);
847 field->field_type = array_pop(parser->nodes);
848 array_push(node->elements, field, parser->storage);
849 }
850 } else if (parse_match(parser, TOK_LSQUARE)) {
851 node->kind = NODE_SYMBOL_IDX;
852 parse_consume(parser, TOK_NUM_INT, cstr("no array size given"));
853 parse_number(parser);
854 node->arr_size = array_pop(parser->nodes);
855 parse_consume(parser, TOK_RSQUARE,
856 cstr("unmatched brackets ']' in array type"));
857 if (parse_match(parser, TOK_DOT)) {
858 // Symbol chain.
859 parse_consume(parser, TOK_SYMBOL,
860 cstr("expected symbol after '.' operator"));
861 parse_symbol(parser);
862 node->next = array_pop(parser->nodes);
863 }
864 } else {
865 node = node_alloc(parser, NODE_SYMBOL, prev);
866 if (!node) return;
867 }
868 node->value.sym = prev.val;
869 array_push(parser->nodes, node, parser->storage);
870}
871
872void
873parse_grouping(Parser *parser) {
874 if (parser->panic) return;
875#if DEBUG == 1
876 print("parsing group ");
877 print_token(parser->previous);
878#endif
879 parse_expr(parser, PREC_LOW);
880 parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression"));
881}
882
883void
884graph_node(Node *node) {
885 if (node == NULL) return;
886 print("%d [width=2.5,shape=Mrecord,label=\"", node->id);
887 print("{ <type> %s | { L: %d | C: %d } } ", node_str[node->kind],
888 node->line, node->col);
889 switch (node->kind) {
890 case NODE_NUM_INT: print("| Value: %d", node->value.i); break;
891 case NODE_NUM_UINT: print("| Value: %x", node->value.u); break;
892 case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break;
893 case NODE_STRING: print("| Value: %s", node->value.str); break;
894 case NODE_SYMBOL_IDX:
895 case NODE_STRUCT:
896 case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break;
897 case NODE_SYMBOL:
898 case NODE_TYPE: {
899 if (node->is_ptr) {
900 print("| Name: @%s", node->value.sym);
901 } else {
902 print("| Name: %s", node->value.sym);
903 }
904 } break;
905 default: break;
906 }
907 println("\"];");
908
909 switch (node->kind) {
910 case NODE_COND:
911 case NODE_MATCH: {
912 if (node->match_expr) {
913 println("%d:e->%d:w;", node->id, node->match_expr->id);
914 graph_node(node->match_expr);
915 }
916 for (sz i = 0; i < array_size(node->match_cases); i++) {
917 Node *next = node->match_cases[i];
918 println("%d:e->%d:w;", node->id, next->id);
919 graph_node(next);
920 }
921 } break;
922 case NODE_BLOCK:
923 case NODE_STRUCT_LIT:
924 case NODE_COMPOUND_TYPE: {
925 for (sz i = 0; i < array_size(node->elements); i++) {
926 Node *next = node->elements[i];
927 println("%d:e->%d:w;", node->id, next->id);
928 graph_node(next);
929 }
930 } break;
931 case NODE_ENUM:
932 case NODE_STRUCT: {
933 for (sz i = 0; i < array_size(node->struct_field); i++) {
934 Node *next = node->struct_field[i];
935 println("%d:e->%d:w;", node->id, next->id);
936 graph_node(next);
937 }
938 } break;
939 case NODE_IF: {
940 if (node->cond_if) {
941 println("%d:e->%d:w;", node->id, node->cond_if->id);
942 graph_node(node->cond_if);
943 }
944 if (node->cond_expr) {
945 println("%d:e->%d:w;", node->id, node->cond_expr->id);
946 graph_node(node->cond_expr);
947 }
948 if (node->cond_else) {
949 println("%d:e->%d:w;", node->id, node->cond_else->id);
950 graph_node(node->cond_else);
951 }
952 } break;
953 case NODE_VAL_FIELD:
954 case NODE_SET:
955 case NODE_LET: {
956 if (node->var_name) {
957 println("%d:e->%d:w;", node->id, node->var_name->id);
958 graph_node(node->var_name);
959 }
960 if (node->var_type) {
961 println("%d:e->%d:w;", node->id, node->var_type->id);
962 graph_node(node->var_type);
963 }
964 if (node->var_val) {
965 println("%d:e->%d:w;", node->id, node->var_val->id);
966 graph_node(node->var_val);
967 }
968 } break;
969 default: {
970 if (node->left) {
971 println("%d:e->%d:w;", node->id, node->left->id);
972 graph_node(node->left);
973 }
974 if (node->right) {
975 println("%d:e->%d:w;", node->id, node->right->id);
976 graph_node(node->right);
977 }
978 } break;
979 }
980}
981
982void
983graph_ast(Node **roots) {
984 if (roots == NULL) return;
985 println("digraph ast {");
986 println("rankdir=LR;");
987 println("ranksep=\"0.95 equally\";");
988 println("nodesep=\"0.5 equally\";");
989 println("overlap=scale;");
990 println("bgcolor=\"transparent\";");
991 for (sz i = 0; i < array_size(roots); ++i) {
992 println("subgraph %d {", i);
993 Node *root = roots[array_size(roots) - 1 - i];
994 graph_node(root);
995 println("}");
996 }
997 println("}");
998}
999
1000void
1001process_file(Str path) { 25process_file(Str path) {
1002 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); 26 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);
1003 27
diff --git a/src/parser.c b/src/parser.c
index 3f15b47..84f3424 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,488 +1,970 @@
1#include "parser.h" 1#ifndef PARSER_C
2#include "darray.h" 2#define PARSER_C
3
4#include "badlib.h"
5#include "lexer.c"
6
7//
8// Nodes
9//
10
11typedef enum NodeKind {
12 // Arithmetic.
13 NODE_ADD,
14 NODE_SUB,
15 NODE_DIV,
16 NODE_MUL,
17 NODE_MOD,
18 // Logical.
19 NODE_NOT,
20 NODE_AND,
21 NODE_OR,
22 NODE_EQ,
23 NODE_NEQ,
24 NODE_LT,
25 NODE_GT,
26 NODE_LE,
27 NODE_GE,
28 // Bitwise ops.
29 NODE_BITNOT,
30 NODE_BITAND,
31 NODE_BITOR,
32 NODE_BITLSHIFT,
33 NODE_BITRSHIFT,
34 // Literals.
35 NODE_STRING,
36 NODE_SYMBOL,
37 NODE_NUM_INT,
38 NODE_NUM_UINT,
39 NODE_NUM_FLOAT,
40 NODE_TRUE,
41 NODE_FALSE,
42 NODE_NIL,
43 NODE_STRUCT_LIT,
44 // Keywords.
45 NODE_LET,
46 NODE_SET,
47 NODE_STRUCT,
48 NODE_IF,
49 NODE_MATCH,
50 NODE_CASE,
51 NODE_COND,
52 NODE_ENUM,
53 NODE_BREAK,
54 NODE_CONTINUE,
55 NODE_WHILE,
56 // Helpers.
57 NODE_SYMBOL_IDX,
58 NODE_TYPE,
59 NODE_ARR_TYPE,
60 NODE_COMPOUND_TYPE,
61 NODE_VAL_FIELD,
62 NODE_BLOCK,
63} NodeKind;
64
65Str node_str[] = {
66 // Arithmetic.
67 [NODE_ADD] = cstr("ADD"),
68 [NODE_SUB] = cstr("SUB"),
69 [NODE_DIV] = cstr("DIV"),
70 [NODE_MUL] = cstr("MUL"),
71 [NODE_MOD] = cstr("MOD"),
72 // Logical.
73 [NODE_NOT] = cstr("NOT"),
74 [NODE_AND] = cstr("AND"),
75 [NODE_OR] = cstr("OR"),
76 [NODE_EQ] = cstr("EQ"),
77 [NODE_NEQ] = cstr("NEQ"),
78 [NODE_LT] = cstr("LT"),
79 [NODE_GT] = cstr("GT"),
80 [NODE_LE] = cstr("LE"),
81 [NODE_GE] = cstr("GE"),
82 // Bitwise ops.
83 [NODE_BITNOT] = cstr("BITNOT"),
84 [NODE_BITAND] = cstr("BITAND"),
85 [NODE_BITOR] = cstr("BITOR"),
86 [NODE_BITLSHIFT] = cstr("BITLSHIFT"),
87 [NODE_BITRSHIFT] = cstr("BITRSHIFT"),
88 // Literals.
89 [NODE_STRING] = cstr("STRING"),
90 [NODE_SYMBOL] = cstr("SYMBOL"),
91 [NODE_NUM_INT] = cstr("INT"),
92 [NODE_NUM_UINT] = cstr("UINT"),
93 [NODE_NUM_FLOAT] = cstr("FLOAT"),
94 [NODE_TRUE] = cstr("TRUE"),
95 [NODE_FALSE] = cstr("FALSE"),
96 [NODE_NIL] = cstr("NIL"),
97 [NODE_STRUCT_LIT] = cstr("STRUCT LIT"),
98 // Keywords.
99 [NODE_LET] = cstr("LET"),
100 [NODE_SET] = cstr("SET"),
101 [NODE_STRUCT] = cstr("STRUCT DEF"),
102 [NODE_IF] = cstr("IF"),
103 [NODE_MATCH] = cstr("MATCH"),
104 [NODE_CASE] = cstr("CASE"),
105 [NODE_COND] = cstr("COND"),
106 [NODE_ENUM] = cstr("ENUM"),
107 [NODE_BREAK] = cstr("BREAK"),
108 [NODE_CONTINUE] = cstr("CONTINUE"),
109 [NODE_WHILE] = cstr("WHILE"),
110 // Helpers.
111 [NODE_TYPE] = cstr("TYPE"),
112 [NODE_ARR_TYPE] = cstr("TYPE (ARR)"),
113 [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"),
114 [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"),
115 [NODE_VAL_FIELD] = cstr("FIELD"),
116 [NODE_BLOCK] = cstr("BLOCK"),
117};
118
119typedef struct Node {
120 sz id;
121 sz line;
122 sz col;
123
124 NodeKind kind;
125 union {
126 f64 d;
127 sz i;
128 u64 u;
129 Str str;
130 Str sym;
131 } value;
132 union {
133 struct {
134 struct Node *left;
135 struct Node *right;
136 };
137 struct {
138 struct Node *next;
139 struct Node *arr_size;
140 };
141 struct {
142 struct Node *var_name;
143 struct Node *var_type;
144 struct Node *var_val;
145 };
146 struct {
147 struct Node *while_cond;
148 struct Node *while_expr;
149 };
150 struct {
151 struct Node *cond_if;
152 struct Node *cond_expr;
153 struct Node *cond_else;
154 };
155 struct {
156 struct Node *field_name;
157 struct Node *field_type;
158 struct Node *field_val;
159 };
160 struct {
161 struct Node *match_expr;
162 struct Node **match_cases;
163 };
164 struct {
165 struct Node *case_value;
166 struct Node *case_expr;
167 };
168 struct Node **struct_field;
169 struct Node **elements;
170 struct Node **statements;
171 };
172 bool is_ptr;
173} Node;
174
175//
176// Parsing.
177//
178
179typedef struct Parser {
180 Token current;
181 Token previous;
182 Token *tokens;
183 sz idx;
184
185 // Error handling.
186 bool err;
187 bool panic;
188 Str file_name;
189
190 // Storage.
191 Node **nodes;
192 Arena *storage;
193} Parser;
194
195typedef enum {
196 PREC_NONE = 0,
197 PREC_LOW, // lowest precedence
198 PREC_BITLOGIC, // & |
199 PREC_BITSHIFT, // << >>
200 PREC_OR, // ||
201 PREC_AND, // &&
202 PREC_EQUALITY, // == !=
203 PREC_COMPARISON, // < > <= >=
204 PREC_TERM, // + -
205 PREC_FACTOR, // * / %
206 PREC_UNARY, // ! -
207 PREC_CALL, // . ()
208 PREC_PRIMARY // highest precedence
209} ParsePrecedence;
210
211typedef void (*ParseFn)(Parser *);
212
213typedef struct {
214 ParseFn prefix;
215 ParseFn infix;
216 ParsePrecedence precedence;
217} ParseRule;
218
219void parse_expr(Parser *parser, ParsePrecedence precedence);
220void parse_advance(Parser *parser);
221bool parse_match(Parser *parser, TokenKind kind);
222
223void parse_grouping(Parser *parser);
224void parse_unary(Parser *parser);
225void parse_binary(Parser *parser);
226void parse_number(Parser *parser);
227void parse_literal(Parser *parser);
228void parse_string(Parser *parser);
229void parse_symbol(Parser *parser);
230void parse_keyword(Parser *parser);
231void parse_type(Parser *parser);
232void parse_block(Parser *parser);
233
234ParseRule parse_rules[] = {
235 [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE},
236 [TOK_LCURLY] = {parse_block, NULL, PREC_NONE},
237 [TOK_AT] = {parse_symbol, NULL, PREC_NONE},
238
239 // Arithmetic.
240 [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM},
241 [TOK_ADD] = {NULL, parse_binary, PREC_TERM},
242 [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR},
243 [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR},
244 [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR},
245
246 // Logical.
247 [TOK_NOT] = {parse_unary, NULL, PREC_NONE},
248 [TOK_AND] = {NULL, parse_binary, PREC_AND},
249 [TOK_OR] = {NULL, parse_binary, PREC_OR},
250 [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY},
251 [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY},
252 [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON},
253 [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON},
254 [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON},
255 [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON},
256
257 // Bitwise ops.
258 [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE},
259 [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC},
260 [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC},
261 [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT},
262 [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT},
263
264 // Literals.
265 [TOK_STRING] = {parse_string, NULL, PREC_NONE},
266 [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE},
267 [TOK_CHAR] = {parse_number, NULL, PREC_NONE},
268 [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE},
269 [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE},
270 [TOK_TRUE] = {parse_literal, NULL, PREC_NONE},
271 [TOK_FALSE] = {parse_literal, NULL, PREC_NONE},
272 [TOK_NIL] = {parse_literal, NULL, PREC_NONE},
273
274 // Statements.
275 [TOK_LET] = {parse_keyword, NULL, PREC_NONE},
276 [TOK_SET] = {parse_keyword, NULL, PREC_NONE},
277 [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE},
278 [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE},
279 [TOK_IF] = {parse_keyword, NULL, PREC_NONE},
280 [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE},
281 [TOK_COND] = {parse_keyword, NULL, PREC_NONE},
282 [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE},
283 [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE},
284 [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE},
285 [TOK_FUN] = {parse_keyword, NULL, PREC_NONE},
286 [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE},
287
288 [TOK_EOF] = {NULL, NULL, PREC_NONE},
289};
3 290
4Token 291Node *
5next_token(Parser *parser) { 292node_alloc(Parser *parser, NodeKind kind, Token tok) {
6 return parser->tokens[parser->current_token++]; 293 if (parser->panic) return NULL;
294 static sz id = 0;
295 Node *node = arena_calloc((sz)sizeof(Node), parser->storage);
296 node->id = id++;
297 node->kind = kind;
298 node->line = tok.line;
299 node->col = tok.col;
300 return node;
7} 301}
8 302
9Token 303void
10peek_token(Parser *parser) { 304parse_emit_err(Parser *parser, Token token, Str msg) {
11 return parser->tokens[parser->current_token]; 305 if (parser->panic) return;
12} 306 parser->panic = true;
307 parser->err = true;
308 eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col,
309 msg);
13 310
14bool 311 if (token.kind == TOK_EOF) {
15has_next(Parser *parser) { 312 eprintln(" at end of the file");
16 return parser->current_token < array_size(parser->tokens); 313 } else if (token.kind != TOK_UNKNOWN) {
314 eprintln(" at %s", token.val);
315 }
17} 316}
18 317
19bool 318void
20consume_rparen(Parser *parser) { 319parse_advance(Parser *parser) {
21 Token tok = next_token(parser); 320 assert(parser);
22 if (tok.type != TOKEN_RPAREN) { 321 parser->previous = parser->current;
23 push_error(ERR_TYPE_PARSER, ERR_NOT_A_RPAREN, tok.line, tok.col); 322 if (parser->previous.kind == TOK_EOF) {
24 return false; 323 return;
25 } 324 }
26 return true; 325 parser->current = parser->tokens[parser->idx++];
27} 326}
28 327
29bool 328bool
30consume_lparen(Parser *parser) { 329parse_match(Parser *parser, TokenKind kind) {
31 Token tok = next_token(parser); 330 assert(parser);
32 if (tok.type != TOKEN_LPAREN) { 331 if (parser->current.kind != kind) {
33 push_error(ERR_TYPE_PARSER, ERR_NOT_A_LPAREN, tok.line, tok.col);
34 return false; 332 return false;
35 } 333 }
334 parse_advance(parser);
36 return true; 335 return true;
37} 336}
38 337
39Node * 338static void
40parse_number(Parser *parser) { 339parse_consume(Parser *parser, TokenKind kind, Str msg) {
41 Token tok = next_token(parser); 340 if (parser->current.kind == kind) {
42 if (tok.type != TOKEN_NUMBER) { 341 parse_advance(parser);
43 push_error(ERR_TYPE_PARSER, ERR_NOT_A_NUMBER, tok.line, tok.col); 342 return;
44 return NULL;
45 }
46
47 bool negative = false;
48 int base = 10;
49 char c = sv_next(&tok.value);
50 if (c == '-') {
51 negative = true;
52 c = sv_next(&tok.value);
53 }
54 if (c == '+') {
55 c = sv_next(&tok.value);
56 }
57 if (c == '0' && sv_peek(&tok.value) != '\0') {
58 c = sv_next(&tok.value);
59 if (c == 'x') {
60 base = 16;
61 c = sv_next(&tok.value);
62 } else if (c == 'b') {
63 base = 2;
64 c = sv_next(&tok.value);
65 } else if (!(c >= '0' && c <= '9')){
66 push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col);
67 return NULL;
68 }
69 }
70
71 // Integral part.
72 u64 integral = 0;
73 while (c != '\0') {
74 ssize_t current = 0;
75 if (c >= 'a' && c <= 'z' && base == 16) {
76 current = (c - 'a') + 10;
77 } else if (c >= 'A' && c <= 'Z' && base == 16) {
78 current = (c - 'A') + 10;
79 } else if (c >= '0' && c <= '9') {
80 current = (c - '0');
81 } else if (c == '.') {
82 c = sv_next(&tok.value);
83 break;
84 } else {
85 push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col);
86 return NULL;
87 }
88 integral = integral * base + current;
89 c = sv_next(&tok.value);
90 }
91
92 // Fractional part.
93 u64 fractional = 0;
94 while (c != '\0') {
95 ssize_t current = 0;
96 if (c >= 'a' && c <= 'z' && base == 16) {
97 current = (c - 'a') + 10;
98 } else if (c >= 'A' && c <= 'Z' && base == 16) {
99 current = (c - 'A') + 10;
100 } else if (c >= '0' && c <= '9') {
101 current = (c - '0');
102 } else {
103 push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col);
104 return NULL;
105 }
106 fractional = fractional * base + current;
107 c = sv_next(&tok.value);
108 } 343 }
109 344 parse_emit_err(parser, parser->current, msg);
110 Node * node = alloc_node(NODE_NUMBER);
111 node->number.negative = negative;
112 node->number.integral = integral;
113 node->number.fractional = fractional;
114 node->line = tok.line;
115 node->col = tok.col;
116 return node;
117} 345}
118 346
119Node * 347void
120parse_string(Parser *parser) { 348parse_block(Parser *parser) {
121 Token tok = next_token(parser); 349#if DEBUG == 1
122 if (tok.type != TOKEN_STRING) { 350 print("parsing block ");
123 push_error(ERR_TYPE_PARSER, ERR_NOT_A_STRING, tok.line, tok.col); 351 print_token(parser->previous);
124 return NULL; 352#endif
125 } 353 Node *block = node_alloc(parser, NODE_BLOCK, parser->previous);
126 Node *node = alloc_node(NODE_STRING); 354 if (!block) return;
127 node->string = tok.value; 355 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
128 node->line = tok.line; 356 parse_expr(parser, PREC_LOW);
129 node->col = tok.col; 357 Node *next = array_pop(parser->nodes);
130 return node; 358 array_push(block->statements, next, parser->storage);
359 }
360 array_push(parser->nodes, block, parser->storage);
131} 361}
132 362
133Node * 363void
134parse_symbol(Parser *parser) { 364parse_expr(Parser *parser, ParsePrecedence precedence) {
135 Token tok = next_token(parser); 365 parse_advance(parser);
136 if (tok.type != TOKEN_SYMBOL) { 366 // TODO: synchronize panic mode on keywords.
137 push_error(ERR_TYPE_PARSER, ERR_NOT_A_SYMBOL, tok.line, tok.col); 367 if (parser->panic) return;
138 return NULL; 368 ParseFn prefix = parse_rules[parser->previous.kind].prefix;
369 if (prefix == NULL) {
370 parse_emit_err(parser, parser->previous, cstr("expected expression"));
371 return;
372 }
373 prefix(parser);
374
375 while (precedence <= parse_rules[parser->current.kind].precedence) {
376 parse_advance(parser);
377 ParseFn infix = parse_rules[parser->previous.kind].infix;
378 if (infix == NULL) {
379 parse_emit_err(parser, parser->previous,
380 cstr("expected expression"));
381 return;
382 }
383 infix(parser);
139 } 384 }
140 Node *node = alloc_node(NODE_SYMBOL);
141 node->string = tok.value;
142 node->line = tok.line;
143 node->col = tok.col;
144 return node;
145} 385}
146 386
147Node * 387void
148parse_type(Parser *parser) { 388parse_unary(Parser *parser) {
149 Token tok = next_token(parser); 389 if (parser->panic) return;
150 if (tok.type != TOKEN_COLON) { 390 Token prev = parser->previous;
151 push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); 391#if DEBUG == 1
152 return NULL; 392 print("parsing unary ");
153 } 393 print_token(prev);
154 Node *node = alloc_node(NODE_TYPE); 394#endif
155 node->line = tok.line; 395 parse_expr(parser, PREC_UNARY);
156 node->col = tok.col; 396 Node *node = NULL;
157 tok = next_token(parser); 397 switch (prev.kind) {
158 if (tok.type != TOKEN_SYMBOL) { 398 case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break;
159 push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); 399 case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break;
160 return NULL; 400 default: break; // Unreachable.
161 } 401 }
162 node->string = tok.value; 402 if (!node) return;
163 return node; 403 node->left = array_pop(parser->nodes);
404 array_push(parser->nodes, node, parser->storage);
164} 405}
165 406
166Node * 407void
167parse_bool(Parser *parser) { 408parse_literal(Parser *parser) {
168 Token tok = next_token(parser); 409 if (parser->panic) return;
169 if (!(tok.type == TOKEN_TRUE || tok.type == TOKEN_FALSE)) { 410 Token prev = parser->previous;
170 push_error(ERR_TYPE_PARSER, ERR_NOT_A_BOOL, tok.line, tok.col); 411#if DEBUG == 1
171 return NULL; 412 print("parsing literal ");
172 } 413 print_token(prev);
173 Node *node = alloc_node(NODE_BOOL); 414#endif
174 node->boolean = tok.type == TOKEN_TRUE; 415 Node *node = NULL;
175 node->line = tok.line; 416 switch (prev.kind) {
176 node->col = tok.col; 417 case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break;
177 return node; 418 case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break;
419 case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break;
420 default: return; // Unreachable.
421 }
422 if (!node) return;
423 array_push(parser->nodes, node, parser->storage);
178} 424}
179 425
180Node * 426void
181parse_builtin(Parser *parser) { 427parse_type(Parser *parser) {
182 Token tok = next_token(parser); 428 Token prev = parser->previous;
183 Node *node = alloc_node(NODE_BUILTIN); 429#if DEBUG == 1
184 node->builtin.type = tok.type; 430 print("parsing type ");
185 node->line = tok.line; 431 print_token(prev);
186 node->col = tok.col; 432#endif
187 array_init(node->builtin.args, 0); 433 Node *node = node_alloc(parser, NODE_TYPE, prev);
188 while (has_next(parser)) { 434 if (!node) return;
189 Token next = peek_token(parser); 435 if (parse_match(parser, TOK_AT)) {
190 if (next.type == TOKEN_RPAREN) { 436 node->is_ptr = true;
191 next_token(parser); 437 }
192 return node; 438 if (parse_match(parser, TOK_LCURLY)) {
439 node->kind = NODE_COMPOUND_TYPE;
440 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
441 parse_consume(parser, TOK_SYMBOL, cstr("expected name"));
442 parse_symbol(parser);
443 Node *next = array_pop(parser->nodes);
444 parse_consume(parser, TOK_COLON, cstr("incomplete type"));
445 parse_type(parser);
446 next->next = array_pop(parser->nodes);
447 array_push(node->elements, next, parser->storage);
193 } 448 }
194 Node *arg = parse_next(parser); 449 } else {
195 if (arg == NULL) { 450 parse_consume(parser, TOK_SYMBOL,
196 break; 451 cstr("no type given for struct field"));
452 node->value.sym = parser->previous.val;
453 // Optional array value?
454 if (parse_match(parser, TOK_LSQUARE)) {
455 node->kind = NODE_ARR_TYPE,
456 parse_consume(parser, TOK_NUM_INT, cstr("no array size given"));
457 parse_number(parser);
458 node->arr_size = array_pop(parser->nodes);
459 parse_consume(parser, TOK_RSQUARE,
460 cstr("unmatched brackets ']' in array type"));
197 } 461 }
198 array_push(node->builtin.args, arg);
199 } 462 }
200 push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); 463 array_push(parser->nodes, node, parser->storage);
201 return NULL;
202} 464}
203 465
204Node * 466void
205parse_funcall(Parser *parser) { 467parse_keyword(Parser *parser) {
206 Token tok = peek_token(parser); 468 if (parser->panic) return;
207 Node *name = parse_symbol(parser); 469 Token prev = parser->previous;
208 if (name == NULL) { 470#if DEBUG == 1
209 return NULL; 471 print("parsing keyword ");
210 } 472 print_token(prev);
211 Node *node = alloc_node(NODE_FUNCALL); 473#endif
212 node->funcall.name = name; 474 Node *node = NULL;
213 node->line = tok.line; 475 switch (prev.kind) {
214 node->col = tok.col; 476 case TOK_LET: {
215 array_init(node->funcall.args, 0); 477 node = node_alloc(parser, NODE_LET, prev);
216 while (has_next(parser)) { 478 if (!node) return;
217 Token next = peek_token(parser); 479 parse_consume(parser, TOK_SYMBOL,
218 if (next.type == TOKEN_RPAREN) { 480 cstr("expected symbol name on let expression"));
219 next_token(parser); 481 parse_symbol(parser);
220 return node; 482 node->var_name = array_pop(parser->nodes);
221 } 483 if (node->var_name->next) {
222 Node *arg = parse_next(parser); 484 parse_emit_err(parser, prev,
223 if (arg == NULL) { 485 cstr("invalid symbol name in let expression"));
224 break; 486 return;
225 } 487 }
226 array_push(node->funcall.args, arg); 488
489 // Optional type declaration.
490 if (parse_match(parser, TOK_COLON)) {
491 parse_type(parser);
492 node->var_type = array_pop(parser->nodes);
493 }
494
495 // Optional assignment.
496 if (parse_match(parser, TOK_ASSIGN)) {
497 parse_expr(parser, PREC_LOW);
498 node->var_val = array_pop(parser->nodes);
499 }
500 } break;
501 case TOK_SET: {
502 node = node_alloc(parser, NODE_SET, prev);
503 if (!node) return;
504 parse_consume(parser, TOK_SYMBOL,
505 cstr("expected symbol name on let expression"));
506 parse_symbol(parser);
507 node->var_name = array_pop(parser->nodes);
508 parse_consume(parser, TOK_ASSIGN,
509 cstr("expected assignment on set expression"));
510 parse_expr(parser, PREC_LOW);
511 node->var_val = array_pop(parser->nodes);
512 } break;
513 case TOK_STRUCT: {
514 node = node_alloc(parser, NODE_STRUCT, prev);
515 if (!node) return;
516 parse_consume(parser, TOK_SYMBOL,
517 cstr("expected symbol name on struct definition"));
518 // Just consume this to avoid conflicts with struct literals.
519 node->value.sym = parser->previous.val;
520
521 parse_consume(parser, TOK_LCURLY,
522 cstr("expected '{' on struct definition"));
523
524 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
525 Node *field =
526 node_alloc(parser, NODE_VAL_FIELD, parser->current);
527 if (!field) return;
528 parse_consume(
529 parser, TOK_SYMBOL,
530 cstr("expected symbol name on struct definition"));
531 parse_symbol(parser);
532 field->field_name = array_pop(parser->nodes);
533 if (field->field_name->next ||
534 field->field_name->kind == NODE_SYMBOL_IDX) {
535 parse_emit_err(parser, prev,
536 cstr("invalid symbol name in struct field"));
537 return;
538 }
539 parse_consume(
540 parser, TOK_COLON,
541 cstr("expected symbol name on struct definition"));
542 parse_type(parser);
543 field->field_type = array_pop(parser->nodes);
544
545 // Optional assignment.
546 if (parse_match(parser, TOK_ASSIGN)) {
547 parse_expr(parser, PREC_LOW);
548 field->field_val = array_pop(parser->nodes);
549 }
550
551 array_push(node->struct_field, field, parser->storage);
552 }
553 } break;
554 case TOK_IF: {
555 node = node_alloc(parser, NODE_IF, prev);
556 if (!node) return;
557 parse_consume(parser, TOK_LPAREN,
558 cstr("expected '(' on if expression"));
559 parse_expr(parser, PREC_LOW);
560 node->cond_if = array_pop(parser->nodes);
561 parse_consume(parser, TOK_RPAREN,
562 cstr("expected ')' on if expression"));
563 parse_expr(parser, PREC_LOW);
564 node->cond_expr = array_pop(parser->nodes);
565 if (parse_match(parser, TOK_ELSE)) {
566 parse_expr(parser, PREC_LOW);
567 node->cond_else = array_pop(parser->nodes);
568 }
569 } break;
570 case TOK_MATCH: {
571 node = node_alloc(parser, NODE_MATCH, prev);
572 if (!node) return;
573 parse_consume(parser, TOK_LPAREN,
574 cstr("expected '(' on if expression"));
575 parse_expr(parser, PREC_LOW);
576 node->match_expr = array_pop(parser->nodes);
577 parse_consume(parser, TOK_RPAREN,
578 cstr("expected ')' on if expression"));
579 parse_consume(parser, TOK_LCURLY,
580 cstr("expected block of match cases"));
581 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
582 Node *tmp = node_alloc(parser, NODE_CASE, parser->previous);
583 if (!tmp) return;
584 // Are we on the default case.
585 if (!parse_match(parser, TOK_ELSE)) {
586 parse_consume(parser, TOK_CASE,
587 cstr("expected case statement"));
588 parse_expr(parser, PREC_LOW);
589 tmp->case_value = array_pop(parser->nodes);
590 }
591 parse_consume(parser, TOK_ASSIGN,
592 cstr("malformed case statement"));
593 parse_expr(parser, PREC_LOW);
594 tmp->case_expr = array_pop(parser->nodes);
595 array_push(node->match_cases, tmp, parser->storage);
596 }
597 // TODO: Check that we only have literals on the match case, this
598 // could be done on the analysis step, but also here...
599 // TODO: Check that there are no multiple default or duplicated
600 // cases.
601 } break;
602 case TOK_ENUM: {
603 node = node_alloc(parser, NODE_ENUM, prev);
604 if (!node) return;
605 parse_consume(parser, TOK_SYMBOL,
606 cstr("expected symbol name on enum definition"));
607 // Just consume this to avoid conflicts with struct literals.
608 node->value.sym = parser->previous.val;
609 parse_consume(parser, TOK_LCURLY,
610 cstr("expected '{' on enum definition"));
611 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
612 Node *field =
613 node_alloc(parser, NODE_VAL_FIELD, parser->current);
614 if (!field) return;
615 parse_consume(parser, TOK_SYMBOL,
616 cstr("expected symbol name on enum definition"));
617 parse_symbol(parser);
618 field->field_name = array_pop(parser->nodes);
619 if (field->field_name->next ||
620 field->field_name->kind == NODE_SYMBOL_IDX) {
621 parse_emit_err(parser, prev,
622 cstr("invalid symbol name in enum field"));
623 return;
624 }
625 if (parse_match(parser, TOK_ASSIGN)) {
626 parse_expr(parser, PREC_LOW);
627 field->field_val = array_pop(parser->nodes);
628 }
629 array_push(node->struct_field, field, parser->storage);
630 }
631 } break;
632 case TOK_COND: {
633 node = node_alloc(parser, NODE_COND, prev);
634 if (!node) return;
635 parse_consume(parser, TOK_LCURLY,
636 cstr("expected '{' on cond expression"));
637 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
638 Node *tmp = node_alloc(parser, NODE_CASE, parser->previous);
639 if (!tmp) return;
640 // Are we on the default case.
641 if (!parse_match(parser, TOK_ELSE)) {
642 parse_consume(parser, TOK_CASE,
643 cstr("expected case statement"));
644 parse_expr(parser, PREC_LOW);
645 tmp->case_value = array_pop(parser->nodes);
646 }
647 parse_consume(parser, TOK_ASSIGN,
648 cstr("malformed case statement"));
649 parse_expr(parser, PREC_LOW);
650 tmp->case_expr = array_pop(parser->nodes);
651 array_push(node->match_cases, tmp, parser->storage);
652 }
653 } break;
654 case TOK_BREAK: {
655 node = node_alloc(parser, NODE_BREAK, prev);
656 if (!node) return;
657 } break;
658 case TOK_CONTINUE: {
659 node = node_alloc(parser, NODE_CONTINUE, prev);
660 if (!node) return;
661 } break;
662 case TOK_WHILE: {
663 node = node_alloc(parser, NODE_WHILE, prev);
664 if (!node) return;
665 parse_consume(parser, TOK_LPAREN,
666 cstr("expected '(' on while expression"));
667 parse_expr(parser, PREC_LOW);
668 node->while_cond = array_pop(parser->nodes);
669 parse_consume(parser, TOK_RPAREN,
670 cstr("expected ')' on while expression"));
671 parse_expr(parser, PREC_LOW);
672 node->while_expr = array_pop(parser->nodes);
673 } break;
674 default: return; // Unreachable.
227 } 675 }
228 push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); 676 array_push(parser->nodes, node, parser->storage);
229 return NULL;
230} 677}
231 678
232Node * 679void
233parse_def(Parser *parser) { 680parse_binary(Parser *parser) {
234 Token tok = next_token(parser); 681 if (parser->panic) return;
235 682 Token prev = parser->previous;
236 Node *symbol = parse_symbol(parser); 683#if DEBUG == 1
237 if (symbol == NULL) { 684 print("parsing binary ");
238 return NULL; 685 print_token(prev);
239 } 686#endif
240 687 ParseRule rule = parse_rules[prev.kind];
241 // TODO: Making type definitions mandatory for now until we introduce type 688 parse_expr(parser, rule.precedence + 1);
242 // inference. 689
243 Node *type = parse_type(parser); 690 Node *node;
244 if (type == NULL) { 691 switch (prev.kind) {
245 return NULL; 692 case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break;
246 } 693 case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break;
247 694 case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break;
248 Node *value = parse_next(parser); 695 case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break;
249 if (value == NULL) { 696 case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break;
250 return NULL; 697 case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break;
251 } 698 case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break;
252 699 case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break;
253 if (!consume_rparen(parser)) { 700 case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break;
254 return NULL; 701 case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break;
702 case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break;
703 case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break;
704 case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break;
705 case TOK_BITAND: {
706 node = node_alloc(parser, NODE_BITAND, prev);
707 } break;
708 case TOK_BITOR: {
709 node = node_alloc(parser, NODE_BITOR, prev);
710 } break;
711 case TOK_BITLSHIFT: {
712 node = node_alloc(parser, NODE_BITLSHIFT, prev);
713 } break;
714 case TOK_BITRSHIFT: {
715 node = node_alloc(parser, NODE_BITRSHIFT, prev);
716 } break;
717 default: {
718 parse_emit_err(parser, prev, cstr("unreachable"));
719 return;
720 }
255 } 721 }
256 722 if (!node) return;
257 Node *node = alloc_node(NODE_DEF); 723 node->right = array_pop(parser->nodes);
258 node->def.symbol = symbol; 724 node->left = array_pop(parser->nodes);
259 node->def.value = value; 725 array_push(parser->nodes, node, parser->storage);
260 node->def.type = type;
261 node->line = tok.line;
262 node->col = tok.col;
263 return node;
264} 726}
265 727
266Node * 728void
267parse_set(Parser *parser) { 729parse_number(Parser *parser) {
268 Token tok = next_token(parser); 730 if (parser->panic) return;
269 731 Token prev = parser->previous;
270 Node *symbol = parse_symbol(parser); 732#if DEBUG == 1
271 if (symbol == NULL) { 733 print("parsing number ");
272 return NULL; 734 print_token(prev);
273 } 735#endif
274 736 Node *node = NULL;
275 Node *value = parse_next(parser); 737 switch (prev.kind) {
276 if (value == NULL) { 738 case TOK_NUM_INT: {
277 return NULL; 739 if (str_has_prefix(prev.val, cstr("0x")) ||
278 } 740 str_has_prefix(prev.val, cstr("0b"))) {
279 741 node = node_alloc(parser, NODE_NUM_UINT, prev);
280 if (!consume_rparen(parser)) { 742 if (!node) return;
281 return NULL; 743 node->value.u = str_to_uint(prev.val);
744 } else {
745 node = node_alloc(parser, NODE_NUM_INT, prev);
746 if (!node) return;
747 node->value.i = str_to_int(prev.val);
748 }
749 } break;
750 case TOK_NUM_FLOAT: {
751 node = node_alloc(parser, NODE_NUM_FLOAT, prev);
752 if (!node) return;
753 node->value.d = str_to_float(prev.val);
754 } break;
755 case TOK_CHAR: {
756 node = node_alloc(parser, NODE_NUM_INT, prev);
757 if (!node) return;
758 node->value.i = prev.val.mem[1];
759 } break;
760 default: break;
282 } 761 }
283 762 array_push(parser->nodes, node, parser->storage);
284 Node *node = alloc_node(NODE_SET);
285 node->set.symbol = symbol;
286 node->set.value = value;
287 node->line = tok.line;
288 node->col = tok.col;
289 return node;
290} 763}
291 764
292Node * 765void
293parse_fun(Parser *parser) { 766parse_string(Parser *parser) {
294 Token tok = next_token(parser); 767 if (parser->panic) return;
295 768 Token prev = parser->previous;
296 Node *name = parse_symbol(parser); 769#if DEBUG == 1
297 if (name == NULL) { 770 print("parsing string ");
298 return NULL; 771 print_token(prev);
299 } 772#endif
300 773 Node *node = node_alloc(parser, NODE_STRING, prev);
301 Node *node = alloc_node(NODE_FUN); 774 if (!node) return;
302 node->fun.name = name; 775 node->value.str = str_remove_prefix(prev.val, cstr("\""));
303 array_init(node->fun.param_names, 0); 776 node->value.str = str_remove_suffix(node->value.str, cstr("\""));
304 array_init(node->fun.param_types, 0); 777 array_push(parser->nodes, node, parser->storage);
305 node->line = tok.line; 778}
306 node->col = tok.col;
307 779
308 // Parse parameter list and return type. 780void
309 if (!consume_lparen(parser)) { 781parse_symbol(Parser *parser) {
310 return NULL; 782 if (parser->panic) return;
311 } 783 Token prev = parser->previous;
312 while (true) { 784#if DEBUG == 1
313 Token next = peek_token(parser); 785 print("parsing symbol ");
314 if (next.type == TOKEN_RPAREN) { 786 print_token(prev);
315 next_token(parser); 787#endif
316 break; 788 if (prev.kind == TOK_AT) {
789 parse_consume(parser, TOK_SYMBOL,
790 cstr("expected symbol after '.' operator"));
791 parse_symbol(parser);
792 Node *node = array_pop(parser->nodes);
793 if (node) {
794 node->is_ptr = true;
795 array_push(parser->nodes, node, parser->storage);
317 } 796 }
318 Node *name = parse_symbol(parser); 797 return;
319 if (name == NULL) { 798 }
320 return NULL; 799 Node *node = node_alloc(parser, NODE_SYMBOL, prev);
800 if (!node) return;
801 if (parse_match(parser, TOK_DOT)) {
802 // Symbol chain.
803 parse_consume(parser, TOK_SYMBOL,
804 cstr("expected symbol after '.' operator"));
805 parse_symbol(parser);
806 node->next = array_pop(parser->nodes);
807 } else if (parse_match(parser, TOK_LCURLY)) {
808 // Struct literal.
809 node->kind = NODE_STRUCT_LIT;
810 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
811 Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current);
812 parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name"));
813 parse_symbol(parser);
814 field->field_name = array_pop(parser->nodes);
815 parse_consume(parser, TOK_ASSIGN, cstr("expected assignment"));
816 parse_expr(parser, PREC_LOW);
817 field->field_type = array_pop(parser->nodes);
818 array_push(node->elements, field, parser->storage);
321 } 819 }
322 Node *type = parse_type(parser); 820 } else if (parse_match(parser, TOK_LSQUARE)) {
323 if (type == NULL) { 821 node->kind = NODE_SYMBOL_IDX;
324 return NULL; 822 parse_consume(parser, TOK_NUM_INT, cstr("no array size given"));
823 parse_number(parser);
824 node->arr_size = array_pop(parser->nodes);
825 parse_consume(parser, TOK_RSQUARE,
826 cstr("unmatched brackets ']' in array type"));
827 if (parse_match(parser, TOK_DOT)) {
828 // Symbol chain.
829 parse_consume(parser, TOK_SYMBOL,
830 cstr("expected symbol after '.' operator"));
831 parse_symbol(parser);
832 node->next = array_pop(parser->nodes);
325 } 833 }
326 array_push(node->fun.param_names, name); 834 } else {
327 array_push(node->fun.param_types, type); 835 node = node_alloc(parser, NODE_SYMBOL, prev);
328 } 836 if (!node) return;
329 Node *ret_type = parse_type(parser);
330 if (ret_type == NULL) {
331 return NULL;
332 }
333 node->fun.return_type = ret_type;
334
335 Node *body = parse_next(parser);
336 if (body == NULL) {
337 return NULL;
338 }
339 node->fun.body = body;
340 if (!consume_rparen(parser)) {
341 return NULL;
342 } 837 }
343 838 node->value.sym = prev.val;
344 return node; 839 array_push(parser->nodes, node, parser->storage);
345} 840}
346 841
347Node * 842void
348parse_if(Parser *parser) { 843parse_grouping(Parser *parser) {
349 Token tok = next_token(parser); 844 if (parser->panic) return;
350 845#if DEBUG == 1
351 Node *node = alloc_node(NODE_IF); 846 print("parsing group ");
352 node->ifexpr.cond = NULL; 847 print_token(parser->previous);
353 node->ifexpr.expr_true = NULL; 848#endif
354 node->ifexpr.expr_false = NULL; 849 parse_expr(parser, PREC_LOW);
355 node->line = tok.line; 850 parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression"));
356 node->col = tok.col;
357
358 Node *cond = parse_next(parser);
359 if (cond == NULL) {
360 return NULL;
361 }
362 Node *expr_true = parse_next(parser);
363 if (expr_true == NULL) {
364 return NULL;
365 }
366 node->ifexpr.cond = cond;
367 node->ifexpr.expr_true = expr_true;
368 tok = peek_token(parser);
369
370 // Optional else statement.
371 if (tok.type != TOKEN_RPAREN) {
372 Node *expr_false = parse_next(parser);
373 if (expr_false == NULL) {
374 return NULL;
375 }
376 node->ifexpr.expr_false = expr_false;
377 }
378
379 if (!consume_rparen(parser)) {
380 return NULL;
381 }
382
383 return node;
384} 851}
385 852
386Node * 853void
387parse_paren(Parser *parser) { 854graph_node(Node *node) {
388 next_token(parser); // Skip paren. 855 if (node == NULL) return;
389 856 print("%d [width=2.5,shape=Mrecord,label=\"", node->id);
390 Token tok = peek_token(parser); 857 print("{ <type> %s | { L: %d | C: %d } } ", node_str[node->kind],
391 858 node->line, node->col);
392 switch (tok.type) { 859 switch (node->kind) {
393 // Builtin functions. 860 case NODE_NUM_INT: print("| Value: %d", node->value.i); break;
394 case TOKEN_ADD: 861 case NODE_NUM_UINT: print("| Value: %x", node->value.u); break;
395 case TOKEN_SUB: 862 case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break;
396 case TOKEN_MUL: 863 case NODE_STRING: print("| Value: %s", node->value.str); break;
397 case TOKEN_DIV: 864 case NODE_SYMBOL_IDX:
398 case TOKEN_MOD: 865 case NODE_STRUCT:
399 case TOKEN_NOT: 866 case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break;
400 case TOKEN_AND: 867 case NODE_SYMBOL:
401 case TOKEN_OR: 868 case NODE_TYPE: {
402 case TOKEN_EQ: 869 if (node->is_ptr) {
403 case TOKEN_LT: 870 print("| Name: @%s", node->value.sym);
404 case TOKEN_GT: 871 } else {
405 case TOKEN_LE: 872 print("| Name: %s", node->value.sym);
406 case TOKEN_GE: { return parse_builtin(parser); } break; 873 }
407 // Special functions. 874 } break;
408 case TOKEN_DEF: { return parse_def(parser); } break;
409 case TOKEN_SET: { return parse_set(parser); } break;
410 case TOKEN_FUN: { return parse_fun(parser); } break;
411 case TOKEN_IF: { return parse_if(parser); } break;
412 default: break; 875 default: break;
413 } 876 }
414 877 println("\"];");
415 return parse_funcall(parser); 878
416} 879 switch (node->kind) {
417 880 case NODE_COND:
418Node * 881 case NODE_MATCH: {
419parse_block(Parser *parser) { 882 if (node->match_expr) {
420 Token tok = next_token(parser); 883 println("%d:e->%d:w;", node->id, node->match_expr->id);
421 884 graph_node(node->match_expr);
422 Node *node = alloc_node(NODE_BLOCK); 885 }
423 array_init(node->block.expr, 0); 886 for (sz i = 0; i < array_size(node->match_cases); i++) {
424 node->line = tok.line; 887 Node *next = node->match_cases[i];
425 node->col = tok.col; 888 println("%d:e->%d:w;", node->id, next->id);
426 while (true) { 889 graph_node(next);
427 Token next = peek_token(parser); 890 }
428 if (next.type == TOKEN_RCURLY) { 891 } break;
429 next_token(parser); 892 case NODE_BLOCK:
430 break; 893 case NODE_STRUCT_LIT:
431 } 894 case NODE_COMPOUND_TYPE: {
432 Node *expr = parse_next(parser); 895 for (sz i = 0; i < array_size(node->elements); i++) {
433 if (expr == NULL) { 896 Node *next = node->elements[i];
434 return NULL; 897 println("%d:e->%d:w;", node->id, next->id);
435 } 898 graph_node(next);
436 array_push(node->block.expr, expr); 899 }
437 } 900 } break;
438 return node; 901 case NODE_ENUM:
439} 902 case NODE_STRUCT: {
440 903 for (sz i = 0; i < array_size(node->struct_field); i++) {
441Node * 904 Node *next = node->struct_field[i];
442parse_next(Parser *parser) { 905 println("%d:e->%d:w;", node->id, next->id);
443 Token tok = peek_token(parser); 906 graph_node(next);
444 switch (tok.type) { 907 }
445 case TOKEN_NUMBER: { return parse_number(parser); } break; 908 } break;
446 case TOKEN_STRING: { return parse_string(parser); } break; 909 case NODE_IF: {
447 case TOKEN_SYMBOL: { return parse_symbol(parser); } break; 910 if (node->cond_if) {
448 case TOKEN_TRUE: 911 println("%d:e->%d:w;", node->id, node->cond_if->id);
449 case TOKEN_FALSE: { return parse_bool(parser); } break; 912 graph_node(node->cond_if);
450 case TOKEN_LPAREN: { return parse_paren(parser); } break; 913 }
451 case TOKEN_EOF: { return NULL; } break; 914 if (node->cond_expr) {
452 case TOKEN_LCURLY: { return parse_block(parser); } break; 915 println("%d:e->%d:w;", node->id, node->cond_expr->id);
916 graph_node(node->cond_expr);
917 }
918 if (node->cond_else) {
919 println("%d:e->%d:w;", node->id, node->cond_else->id);
920 graph_node(node->cond_else);
921 }
922 } break;
923 case NODE_VAL_FIELD:
924 case NODE_SET:
925 case NODE_LET: {
926 if (node->var_name) {
927 println("%d:e->%d:w;", node->id, node->var_name->id);
928 graph_node(node->var_name);
929 }
930 if (node->var_type) {
931 println("%d:e->%d:w;", node->id, node->var_type->id);
932 graph_node(node->var_type);
933 }
934 if (node->var_val) {
935 println("%d:e->%d:w;", node->id, node->var_val->id);
936 graph_node(node->var_val);
937 }
938 } break;
453 default: { 939 default: {
454 push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); 940 if (node->left) {
455 return NULL; 941 println("%d:e->%d:w;", node->id, node->left->id);
942 graph_node(node->left);
943 }
944 if (node->right) {
945 println("%d:e->%d:w;", node->id, node->right->id);
946 graph_node(node->right);
947 }
456 } break; 948 } break;
457 } 949 }
458} 950}
459 951
460bool 952void
461parse_roots(Parser *parser) { 953graph_ast(Node **roots) {
462 while (has_next(parser)) { 954 if (roots == NULL) return;
463 Token tok = peek_token(parser); 955 println("digraph ast {");
464 if (tok.type == TOKEN_EOF) { 956 println("rankdir=LR;");
465 break; 957 println("ranksep=\"0.95 equally\";");
466 } 958 println("nodesep=\"0.5 equally\";");
467 Node *node = parse_next(parser); 959 println("overlap=scale;");
468 if (node == NULL) { 960 println("bgcolor=\"transparent\";");
469 return false; 961 for (sz i = 0; i < array_size(roots); ++i) {
470 } 962 println("subgraph %d {", i);
471 array_push(parser->roots, node); 963 Node *root = roots[array_size(roots) - 1 - i];
472 } 964 graph_node(root);
473 return true; 965 println("}");
966 }
967 println("}");
474} 968}
475 969
476Root * 970#endif // PARSER_C
477parse(Token *tokens) {
478 Parser parser = {
479 .tokens = tokens,
480 .current_token = 0,
481 };
482 array_init(parser.roots, 0);
483
484 if (!parse_roots(&parser)) {
485 return NULL;
486 }
487 return parser.roots;
488}
diff --git a/src/parser.h b/src/parser.h
deleted file mode 100644
index 206ca4c..0000000
--- a/src/parser.h
+++ /dev/null
@@ -1,18 +0,0 @@
1#ifndef BDL_PARSER_H
2#define BDL_PARSER_H
3
4#include "lexer.h"
5#include "nodes.h"
6
7typedef Node* Root;
8
9typedef struct Parser {
10 Token *tokens;
11 size_t current_token;
12 Root *roots;
13} Parser;
14
15Root * parse(Token *tokens);
16Node * parse_next(Parser *parser);
17
18#endif // BDL_PARSER_H